diff --git a/tensorboard/compat/proto/attr_value.proto b/tensorboard/compat/proto/attr_value.proto index 6ba541064c..471535720d 100644 --- a/tensorboard/compat/proto/attr_value.proto +++ b/tensorboard/compat/proto/attr_value.proto @@ -27,7 +27,7 @@ message AttrValue { repeated TensorProto tensor = 8; // "list(tensor)" repeated NameAttrList func = 9; // "list(attr)" } - // LINT.ThenChange(https://www.tensorflow.org/code/tensorflow/c/c_api.cc) + // LINT.ThenChange(//tensorflow/c/c_api.cc) oneof value { bytes s = 2; // "string" diff --git a/tensorboard/compat/proto/config.proto b/tensorboard/compat/proto/config.proto index 55cdf197bf..7a3d102954 100644 --- a/tensorboard/compat/proto/config.proto +++ b/tensorboard/compat/proto/config.proto @@ -212,6 +212,11 @@ message GPUOptions { // When true, use CUDA cudaMallocAsync API instead of TF gpu allocator. bool use_cuda_malloc_async = 11; + + // By default, BFCAllocator may sleep when it runs out of memory, in the + // hopes that another thread will free up memory in the meantime. Setting + // this to true disables the sleep; instead we'll OOM immediately. + bool disallow_retry_on_allocation_failure = 12; } // Everything inside experimental is subject to change and is not subject diff --git a/tensorboard/compat/proto/coordination_config.proto b/tensorboard/compat/proto/coordination_config.proto index 30aa1a3d34..16fdc7804c 100644 --- a/tensorboard/compat/proto/coordination_config.proto +++ b/tensorboard/compat/proto/coordination_config.proto @@ -22,11 +22,25 @@ message CoordinationServiceConfig { // Maximum wait time for all members in the cluster to be registered. int64 cluster_register_timeout_in_ms = 4; - // Heartbeat timeout, if a worker does not record heartbeat in this time + // Heartbeat timeout, if a task does not record heartbeat in this time // window, it will be considered disconnected. + // Note: This is also used as a grace period to accept any heartbeats after + // the agent has disconnected, to account for the lag time between the service + // recording the state change and the agent stopping heartbeats. int64 heartbeat_timeout_in_ms = 5; // The list of jobs that partipate in the coordination service. If empty, all // jobs will be included in the coordination service by default. repeated string coordinated_jobs = 6; + + // Denotes how long to wait for all coordination agents to reach the barriers + // (after the first shutdown request) before disconnecting together. If + // set to 0, no barrier is imposed upon shutdown and each worker can + // disconnect individually. + int64 shutdown_barrier_timeout_in_ms = 7; + + // If set, agents do not make an explicit Shutdown() call. Service will only + // find out about the disconnecte agent via stale heartbeats. Used for + // testing. + bool agent_destruction_without_shutdown = 8; } diff --git a/tensorboard/compat/proto/full_type.proto b/tensorboard/compat/proto/full_type.proto index dc91bbb29a..11245560e4 100644 --- a/tensorboard/compat/proto/full_type.proto +++ b/tensorboard/compat/proto/full_type.proto @@ -8,6 +8,7 @@ option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/full_type_go_proto"; +// LINT.IfChange // Experimental. Represents the complete type information of a TensorFlow value. enum FullTypeId { // The default represents an uninitialized values. @@ -274,3 +275,4 @@ message FullTypeDef { // TODO(mdan): list/tensor, map? Need to reconcile with TFT_RECORD, etc. } } +// LINT.ThenChange(../ir/types/attributes.td) diff --git a/tensorboard/compat/proto/rewriter_config.proto b/tensorboard/compat/proto/rewriter_config.proto index ccf29cadda..0d7c600982 100644 --- a/tensorboard/compat/proto/rewriter_config.proto +++ b/tensorboard/compat/proto/rewriter_config.proto @@ -37,6 +37,13 @@ message RewriterConfig { // may break. For example, assume the shape of a placeholder matches its // actual feed. AGGRESSIVE = 3; + // Run MLIR pass if there's one implemented in TFG, do nothing otherwise. + // I.e., if there's no corresponding TFG pass, it's an OFF. This is supposed + // to be mapped with `ON` and there's no `AGGRESSIVE` in MLIR pass now. + EXPERIMENTAL_MLIR = 4; + // Run both MLIR and Grappler passes consecutively and MLIR pass will come + // first. + EXPERIMENTAL_BOTH = 5; } // Enum for layout conversion between NCHW and NHWC on CPU. Default is OFF. diff --git a/tensorboard/compat/proto/saved_object_graph.proto b/tensorboard/compat/proto/saved_object_graph.proto index 185049ad07..dd230be434 100644 --- a/tensorboard/compat/proto/saved_object_graph.proto +++ b/tensorboard/compat/proto/saved_object_graph.proto @@ -38,7 +38,8 @@ message SavedObject { // Objects which this object depends on: named edges in the dependency // graph. // - // Note: currently only valid if kind == "user_object" or "resource". + // Note: All kinds of SavedObject may have children, except + // "constant" and "captured_tensor". repeated TrackableObjectGraph.TrackableObject.ObjectReference children = 1; // Ordered list of dependencies that must be loaded before this object. diff --git a/tensorboard/compat/proto/trackable_object_graph.proto b/tensorboard/compat/proto/trackable_object_graph.proto index 097d0d7920..499fac1c89 100644 --- a/tensorboard/compat/proto/trackable_object_graph.proto +++ b/tensorboard/compat/proto/trackable_object_graph.proto @@ -33,10 +33,10 @@ message TrackableObjectGraph { string full_name = 2; // The generated name of the Tensor in the checkpoint. string checkpoint_key = 3; - // Whether checkpoints should be considered as matching even without this - // value restored. Used for non-critical values which don't affect the - // TensorFlow graph, such as layer configurations. - bool optional_restore = 4; + // Deprecated bool field for optional restore. This field has never been + // set to True. + reserved "optional_restore"; + reserved 4; } message SlotVariableReference {