Skip to content

rustdoc_json: Intern filenames #142945

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/librustdoc/json/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,15 @@ impl JsonRenderer<'_> {
}
}

pub(crate) trait FromClean<T> {
pub(crate) trait FromClean<T: ?Sized> {
fn from_clean(f: &T, renderer: &JsonRenderer<'_>) -> Self;
}

pub(crate) trait IntoJson<T> {
fn into_json(&self, renderer: &JsonRenderer<'_>) -> T;
}

impl<T, U> IntoJson<U> for T
impl<T: ?Sized, U> IntoJson<U> for T
where
U: FromClean<T>,
{
Expand Down Expand Up @@ -153,7 +153,7 @@ impl FromClean<clean::Span> for Option<Span> {
let hi = span.hi(renderer.sess());
let lo = span.lo(renderer.sess());
Some(Span {
filename: local_path,
filename: local_path.as_path().into_json(renderer),
begin: (lo.line, lo.col.to_usize() + 1),
end: (hi.line, hi.col.to_usize() + 1),
})
Expand All @@ -180,6 +180,20 @@ impl FromClean<Option<ty::Visibility<DefId>>> for Visibility {
}
}

impl FromClean<std::path::Path> for FilenameId {
fn from_clean(path: &std::path::Path, renderer: &JsonRenderer<'_>) -> Self {
let mut filenames = renderer.filenames.borrow_mut();

// To minimize calls to `to_path_buf` (which allocates) we call
// `get_index_of` first, which only needs `&Path` and usually succeeds.
// If it fails we do a separate insert, which requires `PathBuf`.
let idx = filenames
.get_index_of(path)
.unwrap_or_else(|| filenames.insert_full(path.to_path_buf()).0);
FilenameId(idx as u32)
}
}

impl FromClean<attrs::Deprecation> for Deprecation {
fn from_clean(deprecation: &attrs::Deprecation, _renderer: &JsonRenderer<'_>) -> Self {
let attrs::Deprecation { since, note, suggestion: _ } = deprecation;
Expand Down
21 changes: 13 additions & 8 deletions src/librustdoc/json/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::io::{BufWriter, Write, stdout};
use std::path::PathBuf;
use std::rc::Rc;

use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
use rustc_hir::def_id::{DefId, DefIdSet};
use rustc_middle::ty::TyCtxt;
use rustc_session::Session;
Expand All @@ -41,6 +41,8 @@ pub(crate) struct JsonRenderer<'tcx> {
/// A mapping of IDs that contains all local items for this crate which gets output as a top
/// level field of the JSON blob.
index: FxHashMap<types::Id, types::Item>,
// Interned filenames.
filenames: RefCell<FxIndexSet<PathBuf>>,
/// The directory where the JSON blob should be written to.
///
/// If this is `None`, the blob will be printed to `stdout` instead.
Expand Down Expand Up @@ -107,12 +109,12 @@ impl<'tcx> JsonRenderer<'tcx> {
}

fn serialize_and_write<T: Write>(
&self,
sess: &Session,
output_crate: types::Crate,
mut writer: BufWriter<T>,
path: &str,
) -> Result<(), Error> {
self.sess().time("rustdoc_json_serialize_and_write", || {
sess.time("rustdoc_json_serialize_and_write", || {
try_err!(
serde_json::ser::to_writer(&mut writer, &output_crate).map_err(|e| e.to_string()),
path
Expand Down Expand Up @@ -199,6 +201,7 @@ impl<'tcx> FormatRenderer<'tcx> for JsonRenderer<'tcx> {
index: FxHashMap::default(),
out_dir: if options.output_to_stdout { None } else { Some(options.output) },
cache: Rc::new(cache),
filenames: Default::default(),
imported_items,
id_interner: Default::default(),
},
Expand Down Expand Up @@ -318,6 +321,7 @@ impl<'tcx> FormatRenderer<'tcx> for JsonRenderer<'tcx> {
let target = target(self.tcx.sess);

debug!("Constructing Output");
let sess = self.sess();
let output_crate = types::Crate {
root: self.id_from_item_default(e.def_id().into()),
crate_version: self.cache.crate_version.clone(),
Expand All @@ -339,6 +343,7 @@ impl<'tcx> FormatRenderer<'tcx> for JsonRenderer<'tcx> {
)
})
.collect(),
filenames: self.filenames.into_inner().into_iter().collect(),
external_crates: self
.cache
.extern_locations
Expand Down Expand Up @@ -367,13 +372,14 @@ impl<'tcx> FormatRenderer<'tcx> for JsonRenderer<'tcx> {
p.push(output_crate.index.get(&output_crate.root).unwrap().name.clone().unwrap());
p.set_extension("json");

self.serialize_and_write(
Self::serialize_and_write(
sess,
output_crate,
try_err!(File::create_buffered(&p), p),
&p.display().to_string(),
)
} else {
self.serialize_and_write(output_crate, BufWriter::new(stdout().lock()), "<stdout>")
Self::serialize_and_write(sess, output_crate, BufWriter::new(stdout().lock()), "<stdout>")
}
}
}
Expand All @@ -389,16 +395,15 @@ mod size_asserts {
use super::types::*;
// tidy-alphabetical-start
static_assert_size!(AssocItemConstraint, 112);
static_assert_size!(Crate, 184);
static_assert_size!(Crate, 208);
static_assert_size!(ExternalCrate, 48);
static_assert_size!(FunctionPointer, 168);
static_assert_size!(GenericArg, 80);
static_assert_size!(GenericArgs, 104);
static_assert_size!(GenericBound, 72);
static_assert_size!(GenericParamDef, 136);
static_assert_size!(Impl, 304);
// `Item` contains a `PathBuf`, which is different sizes on different OSes.
static_assert_size!(Item, 528 + size_of::<std::path::PathBuf>());
static_assert_size!(Item, 544);
static_assert_size!(ItemSummary, 32);
static_assert_size!(PolyTrait, 64);
static_assert_size!(PreciseCapturingArg, 32);
Expand Down
34 changes: 20 additions & 14 deletions src/rustdoc-json-types/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ pub type FxHashMap<K, V> = HashMap<K, V>; // re-export for use in src/librustdoc
// will instead cause conflicts. See #94591 for more. (This paragraph and the "Latest feature" line
// are deliberately not in a doc comment, because they need not be in public docs.)
//
// Latest feature: Pretty printing of must_use attributes changed
pub const FORMAT_VERSION: u32 = 52;
// Latest feature: Intern filenames
pub const FORMAT_VERSION: u32 = 53;

/// The root of the emitted JSON blob.
///
Expand All @@ -58,6 +58,8 @@ pub struct Crate {
pub index: HashMap<Id, Item>,
/// Maps IDs to fully qualified paths and other info helpful for generating links.
pub paths: HashMap<Id, ItemSummary>,
/// Interned filenames. `FilenameId`s index into this.
pub filenames: Vec<PathBuf>,
/// Maps `crate_id` of items to a crate name and html_root_url if it exists.
pub external_crates: HashMap<u32, ExternalCrate>,
/// Information about the target for which this documentation was generated
Expand Down Expand Up @@ -205,8 +207,9 @@ pub struct Item {
/// A range of source code.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct Span {
/// The path to the source file for this span relative to the path `rustdoc` was invoked with.
pub filename: PathBuf,
/// ID of the path to the source file for this span relative to the path `rustdoc` was invoked
/// with.
pub filename: FilenameId,
/// One indexed Line and Column of the first character of the `Span`.
pub begin: (usize, usize),
/// One indexed Line and Column of the last character of the `Span`.
Expand Down Expand Up @@ -385,21 +388,24 @@ pub enum AssocItemConstraintKind {
Constraint(Vec<GenericBound>),
}

/// An opaque identifier for an item.
/// An opaque identifier for an item. The integer within indexes into
/// [`Crate::index`] to resolve it to an [`Item`], or into [`Crate::paths`]
/// to resolve it to an [`ItemSummary`].
///
/// It can be used to lookup in [`Crate::index`] or [`Crate::paths`] to resolve it
/// to an [`Item`].
///
/// Id's are only valid within a single JSON blob. They cannot be used to
/// resolve references between the JSON output's for different crates.
///
/// Rustdoc makes no guarantees about the inner value of Id's. Applications
/// should treat them as opaque keys to lookup items, and avoid attempting
/// to parse them, or otherwise depend on any implementation details.
Comment on lines -396 to -398
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth keeping this portion of the comment?

On one hand, this comment used to be a lot more useful when the ID was of the form 2:1234 (more or less) where 2 was the crate ID and the rest could be (ab)used as a DefId inside that crate with better than 50-50 odds of working out. On the other hand, the ID values may change representation again in the future, and this is a solid future-proofing warning.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for explaining how it used to be different, now I understand how the comment came to be. It used to make sense, but no more. The id is just an index into another part of the JSON. That's not an "opaque key" in any sense. And the idea of parsing an integer is silly.

As for future-proofing: sure, this could change, but so could literally anything else in the representation. So it doesn't need explicit mention.

/// `Id`s are only valid within a single JSON blob. They cannot be used to
/// resolve references between the JSON output for different crates.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
// FIXME(aDotInTheVoid): Consider making this non-public in rustdoc-types.
pub struct Id(pub u32);

/// An identifier for a filename. The integer within indexes into
/// [`Crate::filenames`] to resolve to the actual filename.
///
/// `FilenameId`s are only valid within a single JSON blob. They cannot be
/// used to resolve references between the JSON output for different crates.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct FilenameId(pub u32);

/// The fundamental kind of an item. Unlike [`ItemEnum`], this does not carry any additional info.
///
/// Part of [`ItemSummary`].
Expand Down