-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreferences.bib
More file actions
2429 lines (2252 loc) · 102 KB
/
references.bib
File metadata and controls
2429 lines (2252 loc) · 102 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@misc{UNComtrade2024,
author = {{United Nations Statistics Division}},
title = {{UN Comtrade Database: International Trade Statistics}},
year = {2024},
howpublished = {\url{https://comtrade.un.org/}},
note = {Accessed: October 2025},
institution = {{United Nations}},
}
@article{webber10rbo,
author = {Webber, William and Moffat, Alistair and Zobel, Justin},
title = {A similarity measure for indefinite rankings},
year = {2010},
issue_date = {November 2010},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {28},
number = {4},
issn = {1046-8188},
url = {https://doi.org/10.1145/1852102.1852106},
doi = {10.1145/1852102.1852106},
abstract = {Ranked lists are encountered in research and daily life and it is often of interest to compare these lists even when they are incomplete or have only some members in common. An example is document rankings returned for the same query by different search engines. A measure of the similarity between incomplete rankings should handle nonconjointness, weight high ranks more heavily than low, and be monotonic with increasing depth of evaluation; but no measure satisfying all these criteria currently exists. In this article, we propose a new measure having these qualities, namely rank-biased overlap (RBO). The RBO measure is based on a simple probabilistic user model. It provides monotonicity by calculating, at a given depth of evaluation, a base score that is non-decreasing with additional evaluation, and a maximum score that is nonincreasing. An extrapolated score can be calculated between these bounds if a point estimate is required. RBO has a parameter which determines the strength of the weighting to top ranks. We extend RBO to handle tied ranks and rankings of different lengths. Finally, we give examples of the use of the measure in comparing the results produced by public search engines and in assessing retrieval systems in the laboratory.},
journal = {ACM Trans. Inf. Syst.},
month = nov,
articleno = {20},
numpages = {38},
keywords = {Rank correlation, probabilistic models, ranking}
}
@article{Landry_XGI_2023,
author = {Landry, Nicholas W. and Lucas, Maxime and Iacopini, Iacopo and Petri, Giovanni and Schwarze, Alice and Patania, Alice and Torres, Leo},
title = {{XGI: A Python package for higher-order interaction networks}},
doi = {10.21105/joss.05162},
journal = {Journal of Open Source Software},
publisher = {The Open Journal},
year = {2023},
month = may,
volume = {8},
number = {85},
pages = {5162},
url = {https://doi.org/10.21105/joss.05162},
}
@article{reuters2022,
title = {Exclusive: Russia's attack on Ukraine halts half of world's neon output chips clouding outlook},
author = {Reuters},
year = {2022},
note = {Online: \url{https://www.reuters.com/technology/exclusive-ukraine-halts-half-worlds-neon-output-chips-clouding-outlook-2022-03-11/}}
}
@article{affinitiv2022,
title = {The Automotive Chip Shortage Crisis: An August 2022 Update},
author = {Affinitiv},
year = {2022},
note = {Online: \url{https://www.affinitiv.com/blog/the-automotive-chip-shortage-crisis-an-august-2022-update/}}
}
@article{fierceelectronics2022,
title = {Ukraine war could hurt supplies of neon, palladium needed for chips},
author = {{Fierce Electronics}},
year = {2022},
note = {Online: \url{https://www.fierceelectronics.com/electronics/ukraine-war-could-hurt-supplies-neon-palladium-needed-chips}}
}
@misc{yangSocraticMethodSelfDiscovery2023,
title = {The {{Socratic Method}} for {{Self-Discovery}} in {{Large Language Models}}},
author = {Yang, Runzhe and Narasimhan},
year = {2023},
month = may,
journal = {Princeton NLP},
urldate = {2025-04-10},
}
@article{costello_lifewire_2025,
title = {Where Is the i{P}hone Made?},
journaltitle = {LifeWire: Tech for Humans},
journal = {LifeWire: Tech for Humans},
author = {Costello, Sam},
date = {2025-04-14},
publisher = {Dotdash Meredith},
url = {https://www.lifewire.com/where-is-the-iphone-made-1999503}
}
@article{rogin_zahn_2024,
title = {Hospitals nationwide grapple with {IV} fluid shortage caused by {H}urricane {H}elene},
journaltitle = {PBS News},
journal = {PBS News},
author = {Rogin, Ali and Zahn, Harry},
date = {2024-12-14},
note = {Accessed Jun 7, 2025},
publisher = {Public Broadcasting System},
url = {https://www.pbs.org/newshour/show/hospitals-nationwide-grapple-with-iv-fluid-shortage-caused-by-hurricane-helene}
}
@article{jewett_2024,
title = {{U.S.} Races to Replenish Storm-Battered Supplies of {IV} Fluids at Hospitals},
journaltitle = {New York Times},
journal = {New York Times},
author = {Jewett, Christina},
date = {2024-10-08},
note = {Accessed Jun 7, 2025},
publisher = {The New York Times Company},
url = {https://www.nytimes.com/2024/10/09/health/hurricane-helene-iv-shortages.html}
}
@inproceedings{cheng_can_2021,
location = {New York, {NY}, {USA}},
title = {Can You Fake It Until You Make It? {I}mpacts of Differentially Private Synthetic Data on Downstream Classification Fairness},
isbn = {978-1-4503-8309-7},
url = {https://doi.org/10.1145/3442188.3445879},
doi = {10.1145/3442188.3445879},
series = {{FAccT} '21},
shorttitle = {Can You Fake It Until You Make It?},
pages = {149--160},
booktitle = {Proceedings of the 2021 {ACM} Conference on Fairness, Accountability, and Transparency},
publisher = {Association for Computing Machinery},
author = {Cheng, Victoria and Suriyakumar, Vinith M. and Dullerud, Natalie and Joshi, Shalmali and Ghassemi, Marzyeh},
urldate = {2021-03-14},
date = {2021-03-03},
year = 2021,
address = {New York, {NY}, {USA}}
}
@inproceedings{jayaraman_evaluating_2019,
location = {Santa Clara, {CA}, {USA}},
title = {Evaluating Differentially Private Machine Learning in Practice},
abstract = {Differential privacy is a strong notion for privacy that can be used to prove formal guarantees, in terms of a privacy budget, , about how much information is leaked by a mechanism. When used in privacy-preserving machine learning, the goal is typically to limit what can be inferred from the model about individual training records. However, the calibration of the privacy budget is not well understood. Implementations of privacy-preserving machine learning often select large values of in order to get acceptable utility of the model, with little understanding of the impact of such choices on meaningful privacy. Moreover, in scenarios where iterative learning procedures are used, relaxed definitions of differential privacy are often used which appear to reduce the needed privacy budget but present poorly understood trade-offs between privacy and utility. In this paper, we quantify the impact of these choices on privacy in experiments with logistic regression and neural network models. Our main finding is that there is no way to obtain privacy for free—relaxed definitions of differential privacy that reduce the amount of noise needed to improve utility also increase the measured privacy leakage. Current mechanisms for differentially private machine learning rarely offer acceptable utility-privacy trade-offs for complex learning tasks: settings that provide limited accuracy loss provide little effective privacy, and settings that provide strong privacy result in useless models.},
pages = {18},
booktitle = {{USENIX} Security},
publisher = {Usenix},
author = {Jayaraman, Bargav and Evans, David},
date = {2019-08},
langid = {english},
keywords = {differential privacy, evaluation, machine learning, measurement, notes:obsidian, read},
year = 2019,
}
@inproceedings{patki_synthetic_2016,
title = {The {S}ynthetic {D}ata {V}ault},
doi = {10.1109/DSAA.2016.49},
abstract = {The goal of this paper is to build a system that automatically creates synthetic data to enable data science endeavors. To achieve this, we present the Synthetic Data Vault ({SDV}), a system that builds generative models of relational databases. We are able to sample from the model and create synthetic data, hence the name {SDV}. When implementing the {SDV}, we also developed an algorithm that computes statistics at the intersection of related database tables. We then used a state-of-the-art multivariate modeling approach to model this data. The {SDV} iterates through all possible relations, ultimately creating a model for the entire database. Once this model is computed, the same relational information allows the {SDV} to synthesize data by sampling from any part of the database. After building the {SDV}, we used it to generate synthetic data for five different publicly available datasets. We then published these datasets, and asked data scientists to develop predictive models for them as part of a crowdsourced experiment. By analyzing the outcomes, we show that synthetic data can successfully replace original data for data science. Our analysis indicates that there is no significant difference in the work produced by data scientists who used synthetic data as opposed to real data. We conclude that the {SDV} is a viable solution for synthetic data generation.},
eventtitle = {2016 {IEEE} International Conference on Data Science and Advanced Analytics ({DSAA})},
pages = {399--410},
booktitle = {2016 {IEEE} International Conference on Data Science and Advanced Analytics ({DSAA})},
author = {Patki, Neha and Wedge, Roy and Veeramachaneni, Kalyan},
date = {2016-10},
keywords = {Computational modeling, crowd sourcing, Data models, data science, Databases, Hidden Markov models, Numerical models, predictive modeling, Predictive models, Synthetic data generation},
year = 2016,
}
@inproceedings{hittmeir_utility_2019,
location = {New York, {NY}, {USA}},
title = {On the Utility of Synthetic Data: {A}n Empirical Evaluation on Machine Learning Tasks},
isbn = {978-1-4503-7164-3},
url = {https://dl.acm.org/doi/10.1145/3339252.3339281},
doi = {10.1145/3339252.3339281},
series = {{ARES} '19},
shorttitle = {On the Utility of Synthetic Data},
pages = {1--6},
booktitle = {Proceedings of the 14th International Conference on Availability, Reliability and Security},
publisher = {Association for Computing Machinery},
author = {Hittmeir, Markus and Ekelhart, Andreas and Mayer, Rudolf},
urldate = {2023-04-06},
date = {2019-08-26},
year = 2019,
address = {New York, {NY}, {USA}},
}
@inproceedings{hittmeir_utility_2019-1,
title = {Utility and Privacy Assessments of Synthetic Data for Regression Tasks},
doi = {10.1109/BigData47090.2019.9005476},
abstract = {With ever increasing capacity for collecting, storing, and processing of data, there is also a high demand for intelligent data analysis methods. While there have been impressive advances in machine learning and similar domains in recent years, this also gives rise to concerns regarding the protection of personal and otherwise sensitive data, especially if it is to be analysed by third parties. Besides anonymisation, which becomes challenging with high dimensional data, one approach for privacy-preserving data mining lies in the usage of synthetic data, which comes with the promise of protecting the users’ data and producing analysis results close to those achieved by using real data. In this paper, we analyse a number of different approaches for creating synthetic data, and study the utility of the created datasets for regression tasks, i.e. the prediction of a numeric value. We further investigate the similarity of real and synthetic data samples. Finally, we contribute to privacy assessments and measurements of the risk of attribute disclosure on synthetic data by extending an approach developed for categorical data.},
eventtitle = {2019 {IEEE} International Conference on Big Data (Big Data)},
pages = {5763--5772},
booktitle = {2019 {IEEE} International Conference on Big Data (Big Data)},
author = {Hittmeir, Markus and Ekelhart, Andreas and Mayer, Rudolf},
date = {2019-12},
keywords = {Attribute Disclosure Assessment, Data models, Data privacy, Machine learning, Machine Learning, Measurement, Privacy, Regression, Synthesizers, Synthetic Data, Task analysis, Tools},
year = 2019,
}
@article{dankar_multi-dimensional_2022,
title = {A Multi-Dimensional Evaluation of Synthetic Data Generators},
volume = {10},
issn = {2169-3536},
doi = {10.1109/ACCESS.2022.3144765},
pages = {11147--11158},
journaltitle = {{IEEE} Access},
journal = {{IEEE} Access},
author = {Dankar, Fida K. and Ibrahim, Mahmoud K. and Ismail, Leila},
date = {2022},
note = {Conference Name: {IEEE} Access},
keywords = {Correlation, Data models, Data utility, Generators, Machine learning, Measurement, Medical services, privacy enhancing technologies, Sociology, synthetic data generators},
year = 2022,
}
@inproceedings{stadler_synthetic_2022,
author = {Theresa Stadler and Bristena Oprisanu and Carmela Troncoso},
title = {Synthetic Data {\textendash} Anonymisation {G}roundhog {D}ay},
booktitle = {31st USENIX Security Symposium (USENIX Security 22)},
year = {2022},
isbn = {978-1-939133-31-1},
address = {Boston, MA},
pages = {1451--1468},
url = {https://www.usenix.org/conference/usenixsecurity22/presentation/stadler},
publisher = {USENIX Association},
month = aug
}
@inproceedings{ping_datasynthesizer_2017,
location = {New York, {NY}, {USA}},
title = {{DataSynthesizer}: {P}rivacy-Preserving Synthetic Datasets},
isbn = {978-1-4503-5282-6},
url = {https://dl.acm.org/doi/10.1145/3085504.3091117},
doi = {10.1145/3085504.3091117},
series = {{SSDBM} '17},
shorttitle = {{DataSynthesizer}},
abstract = {To facilitate collaboration over sensitive data, we present {DataSynthesizer}, a tool that takes a sensitive dataset as input and generates a structurally and statistically similar synthetic dataset with strong privacy guarantees. The data owners need not release their data, while potential collaborators can begin developing models and methods with some confidence that their results will work similarly on the real dataset. The distinguishing feature of {DataSynthesizer} is its usability --- the data owner does not have to specify any parameters to start generating and sharing data safely and effectively. {DataSynthesizer} consists of three high-level modules --- {DataDescriber}, {DataGenerator} and {ModelInspector}. The first, {DataDescriber}, investigates the data types, correlations and distributions of the attributes in the private dataset, and produces a data summary, adding noise to the distributions to preserve privacy. {DataGenerator} samples from the summary computed by {DataDescriber} and outputs synthetic data. {ModelInspector} shows an intuitive description of the data summary that was computed by {DataDescriber}, allowing the data owner to evaluate the accuracy of the summarization process and adjust any parameters, if desired. We describe {DataSynthesizer} and illustrate its use in an urban science context, where sharing sensitive, legally encumbered data between agencies and with outside collaborators is reported as the primary obstacle to data-driven governance. The code implementing all parts of this work is publicly available at https://github.com/{DataResponsibly}/{DataSynthesizer}.},
pages = {1--5},
booktitle = {Proceedings of the 29th International Conference on Scientific and Statistical Database Management},
publisher = {Association for Computing Machinery},
author = {Ping, Haoyue and Stoyanovich, Julia and Howe, Bill},
urldate = {2023-04-18},
date = {2017-06-27},
year = 2017,
}
@article{boedihardjo_covariances_2022,
title = {Covariance’s Loss is Privacy’s Gain: {C}omputationally Efficient, Private and Accurate Synthetic Data},
issn = {1615-3383},
url = {https://doi.org/10.1007/s10208-022-09591-7},
doi = {10.1007/s10208-022-09591-7},
shorttitle = {Covariance’s Loss is Privacy’s Gain},
abstract = {The protection of private information is of vital importance in data-driven research, business and government. The conflict between privacy and utility has triggered intensive research in the computer science and statistics communities, who have developed a variety of methods for privacy-preserving data release. Among the main concepts that have emerged are anonymity and differential privacy. Today, another solution is gaining traction, synthetic data. However, the road to privacy is paved with {NP}-hard problems. In this paper, we focus on the {NP}-hard challenge to develop a synthetic data generation method that is computationally efficient, comes with provable privacy guarantees and rigorously quantifies data utility. We solve a relaxed version of this problem by studying a fundamental, but a first glance completely unrelated, problem in probability concerning the concept of covariance loss. Namely, we find a nearly optimal and constructive answer to the question how much information is lost when we take conditional expectation. Surprisingly, this excursion into theoretical probability produces mathematical techniques that allow us to derive constructive, approximately optimal solutions to difficult applied problems concerning microaggregation, privacy and synthetic data.},
journaltitle = {Foundations of Computational Mathematics},
shortjournal = {Found Comput Math},
author = {Boedihardjo, March and Strohmer, Thomas and Vershynin, Roman},
urldate = {2023-04-19},
date = {2022-09-20},
langid = {english},
year = 2022,
}
@inproceedings{hittmeir_baseline_2020,
location = {New York, {NY}, {USA}},
title = {A Baseline for Attribute Disclosure Risk in Synthetic Data},
isbn = {978-1-4503-7107-0},
url = {https://dl.acm.org/doi/10.1145/3374664.3375722},
doi = {10.1145/3374664.3375722},
series = {{CODASPY} '20},
abstract = {The generation of synthetic data is widely considered as viable method for alleviating privacy concerns and for reducing identification and attribute disclosure risk in micro-data. The records in a synthetic dataset are artificially created and thus do not directly relate to individuals in the original data in terms of a 1-to-1 correspondence. As a result, inferences about said individuals appear to be infeasible and, simultaneously, the utility of the data may be kept at a high level. In this paper, we challenge this belief by interpreting the standard attacker model for attribute disclosure as classification problem. We show how disclosure risk measures presented in recent publications may be compared to or even be reformulated as machine learning classification models. Our overall goal is to empirically analyze attribute disclosure risk in synthetic data and to discuss its close relationship to data utility. Moreover, we improve the baseline for attribute disclosure risk from the attacker's perspective by applying variants of the {RadiusNearestNeighbor} and the {EnsembleVote} classifier.},
pages = {133--143},
booktitle = {Proceedings of the Tenth {ACM} Conference on Data and Application Security and Privacy},
publisher = {Association for Computing Machinery},
author = {Hittmeir, Markus and Mayer, Rudolf and Ekelhart, Andreas},
urldate = {2023-04-19},
date = {2020-03-16},
year = 2020,
}
@article{el_emam_seven_2020,
title = {Seven Ways to Evaluate the Utility of Synthetic Data},
volume = {18},
issn = {1558-4046},
doi = {10.1109/MSEC.2020.2992821},
abstract = {Access to individual-level health data is going to be critical for managing the {COVID}-19 pandemic and enabling society to return to some form of (new) normal functioning. Broader data access is already starting to happen. At the same time, there has been growing alarm by the privacy community about the extent and manner of the level of data sharing that is going on with such sensitive information. In South Korea, broad data sharing has already resulted in some patients being reidentified and experiencing judgment and ridicule,1,2 and some governments have begun to reduce the amount of information being shared about {COVID}-19 cases.3-8 Data synthesis can provide a solution by enabling access to useful information while ensuring reasonable privacy protections.},
pages = {56--59},
number = {4},
journaltitle = {{IEEE} Security \& Privacy},
author = {El Emam, Khaled},
date = {2020-07},
note = {Conference Name: {IEEE} Security \& Privacy},
keywords = {{COVID}-19, Data models, Data privacy, Information sharing, Public healthcare, Stability analysis, Viruses (medical)},
year = 2020,
}
@inproceedings{ganev_robin_2022,
title = {Robin {H}ood and {M}atthew Effects: {D}ifferential Privacy Has Disparate Impact on Synthetic Data},
url = {https://proceedings.mlr.press/v162/ganev22a.html},
shorttitle = {Robin {H}ood and {M}atthew Effects},
eventtitle = {International Conference on Machine Learning},
pages = {6944--6959},
booktitle = {Proceedings of the 39th International Conference on Machine Learning},
publisher = {{PMLR}},
author = {Ganev, Georgi and Oprisanu, Bristena and Cristofaro, Emiliano De},
urldate = {2023-04-19},
date = {2022-06-28},
langid = {english},
note = {{ISSN}: 2640-3498},
address = {Baltimore, MD},
year = 2022,
}
@incollection{hutchison_pcps_2011,
location = {Berlin, Heidelberg},
title = {{PCPs} and the Hardness of Generating Private Synthetic Data},
volume = {6597},
isbn = {978-3-642-19570-9 978-3-642-19571-6},
url = {http://link.springer.com/10.1007/978-3-642-19571-6_24},
abstract = {Assuming the existence of one-way functions, we show that there is no polynomial-time, differentially private algorithm A that takes a database D ∈ (\{0, 1\}d)n and outputs a “synthetic database” D all of whose two-way marginals are approximately equal to those of D. (A two-way marginal is the fraction of database rows x ∈ \{0, 1\}d with a given pair of values in a given pair of columns.) This answers a question of Barak et al. ({PODS} ‘07), who gave an algorithm running in time poly(n, 2d).},
pages = {400--416},
booktitle = {Theory of Cryptography},
publisher = {Springer Berlin Heidelberg},
author = {Ullman, Jonathan and Vadhan, Salil},
editor = {Ishai, Yuval},
editorb = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard},
editorbtype = {redactor},
urldate = {2023-04-19},
date = {2011},
langid = {english},
doi = {10.1007/978-3-642-19571-6_24},
note = {Series Title: Lecture Notes in Computer Science},
year = 2011,
}
@misc{harrison_synthetic_2023,
title = {Synthetic Data To Support {US}-{UK} Prize Challenge For Developing Privacy Enhancing Methods: {P}redicting Individual Infection Risk During A Pandemic},
url = {https://dataverse.lib.virginia.edu/citation?persistentId=doi:10.18130/V3/ZOG1FF},
shorttitle = {Synthetic Data To Support {US}-{UK} Prize Challenge For Developing Privacy Enhancing Methods},
abstract = {Abstract. This document describes synthetically produced epidemic data to support the 2022 {US}-{UK} Prize Challenge focused on advancing privacy-enhancing technologies ({PETs}). Announced by the White House in December 2021, this challenge is part of a series of International Grand Challenges on Democracy-Affirming Technologies; see https://www.whitehouse.gov/ ostp/news-updates/2021/12/08/us-and-uk-to-partner-on-a-prize-challenges-toadvance-privacy-enhancing-technologies/ for more details. In this challenge, participants will attempt to predict the likelihood of an individual getting infected by a disease in a privacy-preserving manner. This task, while of high public health relevance, has been hampered by data availability. The challenge participants will be able to develop and demonstrate their proposed solutions using a synthetic dataset that we have specifically created for this challenge. The synthetic data is developed by integrating realistic data to produce a synthetic social contact network, along with a synthetic outbreak that is similar to the {COVID}-19 pandemic. This dataset, which is very detailed and realistic, serves as ground truth. In addition to the description of the synthetic data, this document also describes three centralized baselines that can be used to evaluate the performance of the proposed methods.},
publisher = {University of Virginia Dataverse},
author = {Harrison, Galen and Chen, Jiangzhuo and Mortveit, Henning and Hoops, Stefan and Porebski, Przemyslaw and Xie, Dawen and Wilson, Amanda and Bhattacharya, Parantapa and Vullikanti, Anil and Xiong, Li and Marathe, Madhav},
editora = {Porebski, Przemyslaw},
editoratype = {collaborator},
urldate = {2023-04-19},
date = {2023},
langid = {english},
doi = {10.18130/V3/ZOG1FF},
note = {Type: dataset},
year = 2023,
}
@article{thorve_high_2023,
title = {High resolution synthetic residential energy use profiles for the {U}nited {S}tates},
volume = {10},
rights = {2023 The Author(s)},
issn = {2052-4463},
url = {https://www.nature.com/articles/s41597-022-01914-1},
doi = {10.1038/s41597-022-01914-1},
abstract = {Efficient energy consumption is crucial for achieving sustainable energy goals in the era of climate change and grid modernization. Thus, it is vital to understand how energy is consumed at finer resolutions such as household in order to plan demand-response events or analyze impacts of weather, electricity prices, electric vehicles, solar, and occupancy schedules on energy consumption. However, availability and access to detailed energy-use data, which would enable detailed studies, has been rare. In this paper, we release a unique, large-scale, digital-twin of residential energy-use dataset for the residential sector across the contiguous United States covering millions of households. The data comprise of hourly energy use profiles for synthetic households, disaggregated into Thermostatically Controlled Loads ({TCL}) and appliance use. The underlying framework is constructed using a bottom-up approach. Diverse open-source surveys and first principles models are used for end-use modeling. Extensive validation of the synthetic dataset has been conducted through comparisons with reported energy-use data. We present a detailed, open, high resolution, residential energy-use dataset for the United States.},
pages = {76},
number = {1},
journaltitle = {Scientific Data},
shortjournal = {Sci Data},
author = {Thorve, Swapna and Baek, Young Yun and Swarup, Samarth and Mortveit, Henning and Marathe, Achla and Vullikanti, Anil and Marathe, Madhav},
urldate = {2023-04-21},
date = {2023-02-06},
langid = {english},
note = {Number: 1
Publisher: Nature Publishing Group},
keywords = {Energy and behaviour, Energy modelling},
year = 2023,
}
@article{venkatramanan_using_2018,
title = {Using data-driven agent-based models for forecasting emerging infectious diseases},
volume = {22},
issn = {1755-4365},
url = {https://www.sciencedirect.com/science/article/pii/S1755436517300221},
doi = {10.1016/j.epidem.2017.02.010},
series = {The {RAPIDD} Ebola Forecasting Challenge},
abstract = {Producing timely, well-informed and reliable forecasts for an ongoing epidemic of an emerging infectious disease is a huge challenge. Epidemiologists and policy makers have to deal with poor data quality, limited understanding of the disease dynamics, rapidly changing social environment and the uncertainty on effects of various interventions in place. Under this setting, detailed computational models provide a comprehensive framework for integrating diverse data sources into a well-defined model of disease dynamics and social behavior, potentially leading to better understanding and actions. In this paper, we describe one such agent-based model framework developed for forecasting the 2014–2015 Ebola epidemic in Liberia, and subsequently used during the Ebola forecasting challenge. We describe the various components of the model, the calibration process and summarize the forecast performance across scenarios of the challenge. We conclude by highlighting how such a data-driven approach can be refined and adapted for future epidemics, and share the lessons learned over the course of the challenge.},
pages = {43--49},
journaltitle = {Epidemics},
shortjournal = {Epidemics},
author = {Venkatramanan, Srinivasan and Lewis, Bryan and Chen, Jiangzhuo and Higdon, Dave and Vullikanti, Anil and Marathe, Madhav},
urldate = {2023-04-21},
date = {2018-03-01},
langid = {english},
keywords = {Agent-based models, Bayesian calibration, Ebola, Emerging infectious diseases, Simulation optimization},
year = 2018,
}
@article{rubin_statistical_1993,
title = {Statistical Disclosure Limitation},
volume = {9},
pages = {461--468},
number = {2},
journaltitle = {Journal of Official Statistics},
author = {Rubin, Donald B},
date = {1993},
year = 1993,
}
@article{yang_differential_2012,
title = {Differential Privacy for Protecting Multi-dimensional Contingency Table Data: {E}xtensions and Applications},
volume = {4},
issn = {2575-8527},
url = {https://journalprivacyconfidentiality.org/index.php/jpc/article/view/613},
doi = {10.29012/jpc.v4i1.613},
shorttitle = {Differential Privacy for Protecting Multi-dimensional Contingency Table Data},
number = {1},
journaltitle = {Journal of Privacy and Confidentiality},
shortjournal = {{JPC}},
author = {Yang, Xiaolin and Fienberg, Stephen E. and Rinaldo, Alessandro},
urldate = {2023-05-02},
date = {2012-07-20},
langid = {english},
year = 2012,
}
@article{zhang_privbayes_2017,
title = {{PrivBayes}: {P}rivate Data Release via {B}ayesian Networks},
volume = {42},
issn = {0362-5915},
url = {https://dl.acm.org/doi/10.1145/3134428},
doi = {10.1145/3134428},
shorttitle = {{PrivBayes}},
pages = {25:1--25:41},
number = {4},
journaltitle = {{ACM} Transactions on Database Systems},
shortjournal = {{ACM} Trans. Database Syst.},
author = {Zhang, Jun and Cormode, Graham and Procopiuc, Cecilia M. and Srivastava, Divesh and Xiao, Xiaokui},
urldate = {2023-05-02},
date = {2017-10-27},
year = 2017
}
@inproceedings{abowd_how_2008,
title = {How Protective Are Synthetic Data?},
booktitle = {Privacy in Statistical Databases},
author = {Abowd, John M. and Vilhuber, Lars},
editor = {Domingo-Ferrer, Josep and Saygın, Yücel},
date = {2008},
year = {2008},
address = {Berlin, Heidelberg},
pages = {239--246},
publisher = {Springer Berlin Heidelberg},
location = {Berlin, Heidelberg},
isbn = {978-3-540-87471-3}
}
@article{dwork_algorithmic_2013,
title = {The Algorithmic Foundations of Differential Privacy},
volume = {9},
issn = {1551-305X, 1551-3068},
url = {http://www.nowpublishers.com/articles/foundations-and-trends-in-theoretical-computer-science/TCS-042},
doi = {10.1561/0400000042},
pages = {211--407},
number = {3},
journaltitle = {Foundations and Trends® in Theoretical Computer Science},
shortjournal = {{FNT} in Theoretical Computer Science},
author = {Dwork, Cynthia and Roth, Aaron},
urldate = {2022-05-17},
date = {2013},
year = 2013,
langid = {english},
}
@unpublished{samarth_swarup_generating_2017,
location = {São Paulo, Brazil},
title = {Generating Synthetic Populations for Social Modeling},
url = {https://biocomplexity.virginia.edu/sites/default/files/staff/madhavtut2.pdf},
type = {Technical Report},
howpublished = {Technical Report},
note = {{AAMAS}},
author = {{Samarth Swarup} and {Madhav Marathe}},
urldate = {2022-05-03},
date = {2017},
year = 2017,
}
@article{cohenPrivateNumbersPublic2022,
title = {Private {{Numbers}} in {{Public Policy}}: {{Census}}, {{Differential Privacy}}, and {{Redistricting}}},
shorttitle = {Private {{Numbers}} in {{Public Policy}}},
author = {Cohen, Aloni and Duchin, Moon and Matthews, J. N. and Suwal, Bhushan},
year = {2022},
month = jun,
journal = {Harvard Data Science Review},
number = {Special Issue 2},
issn = {2644-2353, 2688-8513},
doi = {10.1162/99608f92.22fd8a0e},
urldate = {2023-04-28},
langid = {english},
file = {/Users/galenharrison/Zotero/storage/DTW68FCX/Cohen et al_2022_Private Numbers in Public Policy.pdf}
}
@inproceedings{chenGSWGANGradientSanitizedApproach2020,
title = {{{GS-WGAN}}: {{A Gradient-Sanitized Approach}} for {{Learning Differentially Private Generators}}},
shorttitle = {{{GS-WGAN}}},
booktitle = {Advances in {{Neural Information Processing Systems}}},
author = {Chen, Dingfan and Orekondy, Tribhuvanesh and Fritz, Mario},
year = {2020},
volume = {33},
pages = {12673--12684},
publisher = {{Curran Associates, Inc.}},
address = {Vacouver, Canada},
urldate = {2023-05-10},
}
@inproceedings{harderDPMERFDifferentiallyPrivate2021,
title = {{{DP-MERF}}: {{Differentially Private Mean Embeddings}} with {{RandomFeatures}} for {{Practical Privacy-preserving Data Generation}}},
shorttitle = {{{DP-MERF}}},
booktitle = {Proceedings of {{The}} 24th {{International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
author = {Harder, Frederik and Adamczewski, Kamil and Park, Mijung},
year = {2021},
month = mar,
pages = {1819--1827},
publisher = {{PMLR}},
issn = {2640-3498},
urldate = {2023-05-09},
langid = {english},
address = {Online}
}
@article{sweeneySimpleDemographicsOften2000,
title = {Simple {{Demographics Often Identify People Uniquely}}},
author = {Sweeney, Latanya},
year = {2000},
month = jan,
journal = {Health (San Francisco)},
volume = {671},
pages = {1--34},
langid = {english},
file = {/Users/galenharrison/Zotero/storage/I8K8UU4C/Sweeney - Simple Demographics Often Identify People Uniquely.pdf}
}
@inproceedings{narayananRobustDeanonymizationLarge2008,
title = {Robust {{De-anonymization}} of {{Large Sparse Datasets}}},
booktitle = {2008 {{IEEE Symposium}} on {{Security}} and {{Privacy}} (Sp 2008)},
author = {Narayanan, Arvind and Shmatikov, Vitaly},
year = {2008},
month = may,
pages = {111--125},
publisher = {{IEEE}},
address = {{Oakland, CA, USA}},
issn = {1081-6011},
doi = {10.1109/SP.2008.33},
urldate = {2022-11-09},
isbn = {978-0-7695-3168-7},
langid = {english},
file = {/Users/galenharrison/Zotero/storage/UCKLSVAM/Narayanan and Shmatikov - 2008 - Robust De-anonymization of Large Sparse Datasets.pdf}
}
@incollection{ullmanPCPsHardnessGenerating2011,
title = {{{PCPs}} and the {{Hardness}} of {{Generating Private Synthetic Data}}},
booktitle = {Theory of {{Cryptography}}},
author = {Ullman, Jonathan and Vadhan, Salil},
editor = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Doug and Vardi, Moshe Y. and Weikum, Gerhard and Ishai, Yuval},
year = {2011},
volume = {6597},
pages = {400--416},
publisher = {{Springer Berlin Heidelberg}},
address = {{Berlin, Heidelberg}},
doi = {10.1007/978-3-642-19571-6_24},
urldate = {2023-04-19},
isbn = {978-3-642-19570-9 978-3-642-19571-6},
langid = {english},
file = {/Users/galenharrison/Zotero/storage/42A25438/Ullman and Vadhan - 2011 - PCPs and the Hardness of Generating Private Synthe.pdf}
}
@article{mckennaOptimizingErrorHighdimensional2018,
title = {Optimizing Error of High-Dimensional Statistical Queries under Differential Privacy},
author = {McKenna, Ryan and Miklau, Gerome and Hay, Michael and Machanavajjhala, Ashwin},
year = {2018},
month = jun,
journal = {Proceedings of the VLDB Endowment},
volume = {11},
number = {10},
pages = {1206--1219},
issn = {2150-8097},
doi = {10.14778/3231751.3231769},
urldate = {2023-05-30},
}
@inproceedings{mckennaGraphicalmodelBasedEstimation2019,
title = {Graphical-Model Based Estimation and Inference for Differential Privacy},
booktitle = {Proceedings of the 36th {{International Conference}} on {{Machine Learning}}},
author = {Mckenna, Ryan and Sheldon, Daniel and Miklau, Gerome},
year = {2019},
month = may,
pages = {4435--4444},
publisher = {{PMLR}},
issn = {2640-3498},
urldate = {2023-05-09},
langid = {english},
address = {Long Beach, CA}
}
@inproceedings{zhangPrivSynDifferentiallyPrivate2021,
title = {\{\vphantom\}{{PrivSyn}}\vphantom\{\}: {{Differentially Private Data Synthesis}}},
shorttitle = {\{\vphantom\}{{PrivSyn}}\vphantom\{\}},
booktitle = {30th {{USENIX Security Symposium}} ({{USENIX Security}} 21)},
author = {Zhang, Zhikun and Wang, Tianhao and Li, Ninghui and Honorio, Jean and Backes, Michael and He, Shibo and Chen, Jiming and Zhang, Yang},
year = {2021},
pages = {929--946},
urldate = {2023-05-02},
isbn = {978-1-939133-24-3},
langid = {english},
}
@article{vietriPrivateSyntheticData2022a,
title = {Private Synthetic Data for Multitask Learning and Marginal Queries},
author = {Vietri, Giuseppe and Archambeau, Cedric and Aydore, Sergul and Brown, William and Kearns, Michael and Roth, Aaron and Siva, Ankit and Tang, Shuai and Wu, Steven Z.},
year = {2022},
journal = {Advances in Neural Information Processing Systems},
volume = {35},
pages = {18282--18295},
}
@inproceedings{aydoreDifferentiallyPrivateQuery2021a,
title = {Differentially {{Private Query Release Through Adaptive Projection}}},
booktitle = {Proceedings of the 38th {{International Conference}} on {{Machine Learning}}},
author = {Aydore, Sergul and Brown, William and Kearns, Michael and Kenthapadi, Krishnaram and Melis, Luca and Roth, Aaron and Siva, Ankit A.},
year = {2021},
month = jul,
pages = {457--467},
publisher = {{PMLR}},
issn = {2640-3498},
urldate = {2023-05-09},
langid = {english},
address = {electronic}
}
@misc{xieDifferentiallyPrivateGenerative2018,
title = {Differentially Private Generative Adversarial Network},
url = {http://arxiv.org/abs/1802.06739},
doi = {10.48550/arXiv.1802.06739},
number = {{arXiv}:1802.06739},
publisher = {{arXiv}},
author = {Xie, Liyang and Lin, Kaixiang and Wang, Shu and Wang, Fei and Zhou, Jiayu},
urldate = {2023-05-09},
date = {2018-02-19},
year = {2018},
eprinttype = {arxiv},
eprint = {1802.06739 [cs, stat]},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Computer Science - Cryptography and Security},
}
@inproceedings{torkzadehmahaniDPCGANDifferentiallyPrivate2019,
title = {{{DP-CGAN}}: {{Differentially Private Synthetic Data}} and {{Label Generation}}},
shorttitle = {{{DP-CGAN}}},
booktitle = {2019 {{IEEE}}/{{CVF Conference}} on {{Computer Vision}} and {{Pattern Recognition Workshops}} ({{CVPRW}})},
author = {Torkzadehmahani, Reihaneh and Kairouz, Peter and Paten, Benedict},
year = {2019},
month = jun,
pages = {98--104},
publisher = {{IEEE}},
address = {{Long Beach, CA, USA}},
doi = {10.1109/CVPRW.2019.00018},
urldate = {2023-05-10},
isbn = {978-1-72812-506-0},
langid = {english},
}
@inproceedings{jordonPATEGANGeneratingSynthetic2018,
title = {{{PATE-GAN}}: {{Generating Synthetic Data}} with {{Differential Privacy Guarantees}}},
shorttitle = {{{PATE-GAN}}},
booktitle = {International {{Conference}} on {{Learning Representations}}},
author = {Jordon, James and Yoon, Jinsung and van der Schaar, Mihaela},
year = {2019},
publisher = {ICLR},
month = dec,
urldate = {2023-05-09},
langid = {english},
address = {New Orleans, LA},
}
@misc{alzantotDifferentialPrivacySynthetic2019,
title = {Differential {{Privacy Synthetic Data Generation}} Using {{WGANs}}},
author = {Alzantot, Moustafa and Srivastava, Mani},
year = {2019},
file = {/Users/galenharrison/Zotero/storage/NYBXXMLH/Alzantot_Srivastava_2019_Differential Privacy Synthetic Data Generation using WGANs.pdf}
}
@misc{dingRetiringAdultNew2022,
title = {Retiring {{Adult}}: {{New Datasets}} for {{Fair Machine Learning}}},
shorttitle = {Retiring {{Adult}}},
author = {Ding, Frances and Hardt, Moritz and Miller, John and Schmidt, Ludwig},
year = {2022},
month = jan,
number = {arXiv:2108.04884},
eprint = {2108.04884},
primaryclass = {cs, stat},
publisher = {{arXiv}},
urldate = {2023-05-27},
archiveprefix = {arxiv},
langid = {english},
}
@inproceedings{wuBoltonDifferentialPrivacy2017,
title = {Bolt-on {{Differential Privacy}} for {{Scalable Stochastic Gradient Descent-based Analytics}}},
booktitle = {Proceedings of the 2017 {{ACM International Conference}} on {{Management}} of {{Data}}},
author = {Wu, Xi and Li, Fengan and Kumar, Arun and Chaudhuri, Kamalika and Jha, Somesh and Naughton, Jeffrey},
year = {2017},
month = may,
pages = {1307--1322},
publisher = {{ACM}},
address = {{Chicago Illinois USA}},
doi = {10.1145/3035918.3064047},
urldate = {2023-06-05},
isbn = {978-1-4503-4197-4},
langid = {english},
}
@inproceedings{papernotHyperparameterTuningRenyi2021,
title = {Hyperparameter {{Tuning}} with {{Renyi Differential Privacy}}},
author = {Papernot, Nicolas and Steinke, Thomas},
date = {2021-10-06},
year = 2022,
booktitle={International Conference on Learning Representations},
url = {https://openreview.net/forum?id=-70L8lpp9DF},
urldate = {2023-09-03},
eventtitle = {International {{Conference}} on {{Learning Representations}}},
langid = {english},
}
@article{chaudhuriDifferentiallyPrivateEmpirical2011,
title = {Differentially {{Private Empirical Risk Minimization}}},
author = {Chaudhuri, Kamalika and Monteleoni, Claire and Sarwate, Anand D.},
date = {2011},
year = 2011,
journal = {Journal of Machine Learning Research},
journaltitle = {Journal of Machine Learning Research},
volume = {12},
number = {29},
pages = {1069--1109},
issn = {1533-7928},
url = {http://jmlr.org/papers/v12/chaudhuri11a.html},
urldate = {2023-06-05},
}
@online{holohanDiffprivlibIBMDifferential2019,
title = {Diffprivlib: {{The IBM Differential Privacy Library}}},
shorttitle = {Diffprivlib},
author = {Holohan, Naoise and Braghin, Stefano and Mac Aonghusa, Pól and Levacher, Killian},
date = {2019-07-04},
year = 2019,
eprint = {1907.02444},
eprinttype = {arxiv},
eprintclass = {cs},
doi = {10.48550/arXiv.1907.02444},
url = {http://arxiv.org/abs/1907.02444},
urldate = {2023-09-03},
pubstate = {preprint},
organization = {arXiv},
}
@book{elemamPracticalSyntheticData2020,
title = {Practical Synthetic Data Generation: {B}alancing Privacy and the Broad Availability of Data},
shorttitle = {Practical Synthetic Data Generation},
author = {El Emam, Khaled and Mosquera, Lucy and Hoptroff, Richard},
date = {2020},
year = 2020,
edition = {First Edition},
publisher = {{O'Reilly Media, Inc}},
location = {{Sebastopol, CA}},
isbn = {978-1-4920-7274-4},
pagetotal = {151},
annotation = {OCLC: on1164815296}
}
@conference{renardy_privacy_red_team2023,
title = {How to Utilize Your Red Team for Privacy},
author = {Renardy, David},
year = 2023,
date = {2023-09},
url = {https://www.usenix.org/conference/pepr23/presentation/renardy},
eventtitle = {{{PEPR}} '23},
venue = {{Santa Clara, CA}}
}
@inproceedings{harrison2020empirical,
title={An empirical study on the perceived fairness of realistic, imperfect machine learning models},
author={Harrison, Galen and Hanson, Julia and Jacinto, Christine and Ramirez, Julio and Ur, Blase},
booktitle={Proceedings of the 2020 conference on fairness, accountability, and transparency},
pages={392--402},
year={2020},
publisher={ACM},
address={Barcelona, Spain}
}
@inproceedings{harrison2024jupyterlab,
title={JupyterLab in Retrograde: {C}ontextual Notifications That Highlight Fairness and Bias Issues for Data Scientists},
author={Harrison, Galen and Bryson, Kevin and Dovichi, Luca and Binion, Aleksander Herrmann and Borem, Arthur and Ur, Blase},
publisher={ACM},
booktitle={Proceedings of the CHI Conference on Human Factors in Computing Systems},
year={2024},
address={Honolulu, HI},
}
@article{mulliganPrivacyEssentiallyContested2016,
title = {Privacy Is an Essentially Contested Concept: {A} Multi-Dimensional Analytic for Mapping Privacy},
shorttitle = {Privacy Is an Essentially Contested Concept},
author = {Mulligan, Deirdre K. and Koopman, Colin and Doty, Nick},
date = {2016-12-28},
journal = {Philosophical Transactions of the Royal Society A: Mathematical, Physical and Engineering Sciences},
volume = {374},
number = {2083},
pages = {20160118},
publisher = {Royal Society},
doi = {10.1098/rsta.2016.0118},
url = {https://royalsocietypublishing.org/doi/full/10.1098/rsta.2016.0118},
urldate = {2022-10-22},
year = {2016}
}
@article{nissenbaumPrivacyContextualIntegrity2004,
title = {Privacy as {{Contextual Integrity}}},
author = {Nissenbaum, Helen},
date = {2004-02-01},
year = {2004},
journal = {Washington Law Review},
volume = {79},
number = {1},
pages = {119},
url = {https://digitalcommons.law.uw.edu/wlr/vol79/iss1/10},
}
@article{viljoenRelationalTheoryData2021,
title = {A {{Relational Theory}} of {{Data Governance}}},
author = {Viljoen, Salome},
date = {2021/2022},
journal = {Yale Law Journal},
year = {2021},
shortjournal = {Yale L. J.},
volume = {131},
number = {2},
pages = {573--654},
url = {https://heinonline.org/HOL/P?h=hein.journals/ylr131&i=595},
urldate = {2023-03-08},
langid = {english},
}
@article{vanbreugelCanYouRely2023,
title = {Can {{You Rely}} on {{Your Model Evaluation}}? {{Improving Model Evaluation}} with {{Synthetic Test Data}}},
shorttitle = {Can {{You Rely}} on {{Your Model Evaluation}}?},
author = {van Breugel, Boris and Seedat, Nabeel and Imrie, Fergus and van der Schaar, Mihaela},
date = {2023-12-15},
journal = {Advances in Neural Information Processing Systems},
year = {2023},
volume = {36},
pages = {1889--1904},
url = {https://proceedings.neurips.cc/paper_files/paper/2023/hash/05fb0f4e645cad23e0ab59d6b9901428-Abstract-Conference.html},
urldate = {2024-03-06},
langid = {english},
}
@article{sizikovaKnowledgebasedSilicoModels2023,
title = {Knowledge-Based in Silico Models and Dataset for the Comparative Evaluation of Mammography {{AI}} for a Range of Breast Characteristics, Lesion Conspicuities and Doses},
author = {Sizikova, Elena and Saharkhiz, Niloufar and Sharma, Diksha and Lago, Miguel and Sahiner, Berkman and Delfino, Jana and Badano, Aldo},
date = {2023-12-15},
journal = {Advances in Neural Information Processing Systems},
volume = {36},
pages = {37401--37412},
url = {https://proceedings.neurips.cc/paper_files/paper/2023/hash/75d0956c9594f47bfb86a07bef58d4b0-Abstract-Datasets_and_Benchmarks.html},
urldate = {2024-03-06},
langid = {english},
year = {2023},
}
@article{huMembershipInferenceAttacks2022,
title = {Membership {{Inference Attacks}} on {{Machine Learning}}: {{A Survey}}},
shorttitle = {Membership {{Inference Attacks}} on {{Machine Learning}}},
author = {Hu, Hongsheng and Salcic, Zoran and Sun, Lichao and Dobbie, Gillian and Yu, Philip S. and Zhang, Xuyun},
date = {2022-09-09},
journaltitle = {ACM Computing Surveys},
journal = {ACM Computing Surveys},
shortjournal = {ACM Comput. Surv.},
volume = {54},
pages = {235:1--235:37},
issn = {0360-0300},
doi = {10.1145/3523273},
url = {https://dl.acm.org/doi/10.1145/3523273},
urldate = {2024-03-01},
issue = {11s},
year = {2022},
}
@inproceedings{yeomPrivacyRiskMachine2018,
title = {Privacy {{Risk}} in {{Machine Learning}}: {{Analyzing}} the {{Connection}} to {{Overfitting}}},
shorttitle = {Privacy {{Risk}} in {{Machine Learning}}},
booktitle = {2018 {{IEEE}} 31st {{Computer Security Foundations Symposium}} ({{CSF}})},
author = {Yeom, Samuel and Giacomelli, Irene and Fredrikson, Matt and Jha, Somesh},
date = {2018-07},
pages = {268--282},
publisher = {IEEE},
location = {Oxford},
doi = {10.1109/CSF.2018.00027},
url = {https://ieeexplore.ieee.org/document/8429311/},
urldate = {2024-02-09},
eventtitle = {2018 {{IEEE}} 31st {{Computer Security Foundations Symposium}} ({{CSF}})},
isbn = {978-1-5386-6680-7},
langid = {english},
year = 2018,
}
@misc{duSystematicAssessmentTabular2024,
title = {Systematic {{Assessment}} of {{Tabular Data Synthesis Algorithms}}},
author = {Du, Yuntao and Li, Ninghui},
year = {2024},
month = apr,
number = {arXiv:2402.06806},
eprint = {2402.06806},
primaryclass = {cs},
publisher = {arXiv},
urldate = {2024-04-16},
archiveprefix = {arxiv},
langid = {english},
}
@inproceedings{alaaHowFaithfulYour2022,
title = {How {{Faithful}} Is Your {{Synthetic Data}}? {{Sample-level Metrics}} for {{Evaluating}} and {{Auditing Generative Models}}},
shorttitle = {How {{Faithful}} Is Your {{Synthetic Data}}?},
booktitle = {Proceedings of the 39th {{International Conference}} on {{Machine Learning}}},
author = {Alaa, Ahmed and Breugel, Boris Van and Saveliev, Evgeny S. and van der Schaar, Mihaela},
year = {2022},
month = jun,
pages = {290--306},
publisher = {PMLR},
issn = {2640-3498},
urldate = {2024-04-16},
langid = {english},
}
@article{chundawatUniversalMetricRobust2024,
title = {A {{Universal Metric}} for {{Robust Evaluation}} of {{Synthetic Tabular Data}}},
author = {Chundawat, Vikram S and Tarun, Ayush K and Mandal, Murari and Lahoti, Mukund and Narang, Pratik},
year = {2024},
month = jan,
journal = {IEEE Transactions on Artificial Intelligence},
volume = {5},
number = {1},
pages = {300--309},
issn = {2691-4581},
doi = {10.1109/TAI.2022.3229289},
urldate = {2024-04-17},
copyright = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/IEEE.html},
langid = {english},
}
@misc{arnoldReallyUsefulSynthetic2021,
title = {Really {{Useful Synthetic Data}} -- {{A Framework}} to {{Evaluate}} the {{Quality}} of {{Differentially Private Synthetic Data}}},
author = {Arnold, Christian and Neunhoeffer, Marcel},
year = {2021},
month = oct,
number = {arXiv:2004.07740},
eprint = {2004.07740},
primaryclass = {cs, stat},
publisher = {arXiv},
doi = {10.48550/arXiv.2004.07740},
urldate = {2024-04-16},
archiveprefix = {arxiv},
}
@article{hansenReimaginingSyntheticTabular2023,
title = {Reimagining {{Synthetic Tabular Data Generation}} through {{Data-Centric AI}}: {{A Comprehensive Benchmark}}},
shorttitle = {Reimagining {{Synthetic Tabular Data Generation}} through {{Data-Centric AI}}},
author = {Hansen, Lasse and Seedat, Nabeel and {van der Schaar}, Mihaela and Petrovic, Andrija},
year = {2023},
month = dec,
journal = {Advances in Neural Information Processing Systems},
volume = {36},
pages = {33781--33823},
urldate = {2024-04-17},
langid = {english},
}
@inproceedings{huSoKPrivacyPreservingData2023,
title = {{{SoK}}: {{Privacy-Preserving Data Synthesis}}},
shorttitle = {{{SoK}}},
booktitle = {2024 {{IEEE Symposium}} on {{Security}} and {{Privacy}} ({{SP}})},
author = {Hu, Yuzheng and Wu, Fan and Li, Qinbin and Long, Yunhui and Garrido, Gonzalo and Ge, Chang and Ding, Bolin and Forsyth, David and Li, Bo and Song, Dawn},
year = {2023},
month = oct,
pages = {2--2},
publisher = {IEEE Computer Society},
issn = {2375-1207},
doi = {10.1109/SP54263.2024.00002},
urldate = {2024-04-16},
isbn = {9798350331301},
langid = {english},
}
@article{bowenComparativeStudyDifferentially2021,
title = {Comparative {{Study}} of {{Differentially Private Synthetic Data Algorithms}} from the {{NIST PSCR Differential Privacy Synthetic Data Challenge}}},
author = {Bowen, Claire McKay and Snoke, Joshua},
year = {2021},
month = feb,
journal = {Journal of Privacy and Confidentiality},
volume = {11},
number = {1},
issn = {2575-8527},
doi = {10.29012/jpc.748},
urldate = {2024-04-16},
copyright = {http://creativecommons.org/licenses/by-nc-nd/4.0},
langid = {english},
}
@misc{ganevInadequacySimilaritybasedPrivacy2023,
title = {On the {{Inadequacy}} of {{Similarity-based Privacy Metrics}}: {{Reconstruction Attacks}} against ``{{Truly Anonymous Synthetic Data}}''},
shorttitle = {On the {{Inadequacy}} of {{Similarity-based Privacy Metrics}}},
author = {Ganev, Georgi and De Cristofaro, Emiliano},
year = {2023},
month = dec,
number = {arXiv:2312.05114},
eprint = {2312.05114},
primaryclass = {cs},
publisher = {arXiv},
doi = {10.48550/arXiv.2312.05114},
urldate = {2024-04-17},
archiveprefix = {arxiv},
}
@inproceedings{ganevRobinHoodMatthew2022,
title = {Robin {{Hood}} and {{Matthew Effects}}: {{Differential Privacy Has Disparate Impact}} on {{Synthetic Data}}},
shorttitle = {Robin {{Hood}} and {{Matthew Effects}}},
booktitle = {Proceedings of the 39th {{International Conference}} on {{Machine Learning}}},
author = {Ganev, Georgi and Oprisanu, Bristena and Cristofaro, Emiliano De},
year = {2022},
month = jun,
pages = {6944--6959},
publisher = {PMLR},
issn = {2640-3498},
urldate = {2023-04-19},
langid = {english},
}