@inproceedings{10.1145/3772318.3790364,
author = {Klaus Scheuerman, Morgan and Hutiri, Wiebke and Rahmattalabi, Aida and Matthews, Victoria and Xiang, Alice and Andrews, Jerone},
title = {Treading the Transparency Tightrope: A Taxonomy of Risks and Benefits of Foundation Model Data Transparency for Transparency Advocates},
year = {2026},
isbn = {9798400722783},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3772318.3790364},
doi = {10.1145/3772318.3790364},
abstract = {Data powering AI is often opaque. Researchers, NGOs, and law and policy leaders have called for greater transparency about how data is used for training, fine-tuning, and evaluation. While data transparency is often championed as crucial, what it concretely enables is largely implicit. Similarly, the concerns developers seem to have about transparency go unstated. This lack of clarity has led some researchers to critique transparency demands as disconnected from the actual benefits—or risks—to specific stakeholders. We analyze documentation from four stakeholder groups to create a taxonomy of the risks and benefits of dataset transparency. Data transparency is perceived as either a risk or a benefit given a stakeholder’s position, rather than wholesale. We also propose data availability and data documentation as two lenses through which to consider transparency. We discuss how best to strategically promote situational data transparency that takes into account the relationship between stakeholder position, transparency modality, and benefits/risks.},
booktitle = {Proceedings of the 2026 CHI Conference on Human Factors in Computing Systems},
articleno = {1554},
numpages = {29},
keywords = {Foundation models, data transparency, datasets, generative AI, privacy, data provenance, open-source, technology companies},
location = {
},
series = {CHI '26}
}