Fix up docstrings

Krzysztof Chalupka · facebook-github-bot · commit 6653f4400b0f · 2022-08-23T14:58:49.000-07:00
Summary: One of the docstrings is a disaster see https://pytorch3d.readthedocs.io/en/latest/modules/ops.html Also some minor fixes I encountered when browsing the code Reviewed By: bottler Differential Revision: D38581595 fbshipit-source-id: 3b6ca97788af380a44df9144a6a4cac782c6eab8
diff --git a/pytorch3d/ops/cameras_alignment.py b/pytorch3d/ops/cameras_alignment.py
@@ -39,31 +39,32 @@ def corresponding_cameras_alignment(
     such that the following holds:
 
         Under the change of coordinates using a similarity transform
-        (R_A, T_A, s_A) a 3D point X' is mapped to X with:
-            ```
+        (R_A, T_A, s_A) a 3D point X' is mapped to X with: ::
+
             X = (X' R_A + T_A) / s_A
-            ```
-        Then, for all cameras `i`, we assume that the following holds:
-            ```
+
+        Then, for all cameras `i`, we assume that the following holds: ::
+
             X R_i + T_i = s' (X' R_i' + T_i'),
-            ```
+
         i.e. an adjusted point X' is mapped by a camera (R_i', T_i')
         to the same point as imaged from camera (R_i, T_i) after resolving
         the scale ambiguity with a global scalar factor s'.
 
-        Substituting for X above gives rise to the following:
-            ```
+        Substituting for X above gives rise to the following: ::
+
             (X' R_A + T_A) / s_A R_i + T_i = s' (X' R_i' + T_i')       // · s_A
             (X' R_A + T_A) R_i + T_i s_A = (s' s_A) (X' R_i' + T_i')
             s' := 1 / s_A  # without loss of generality
             (X' R_A + T_A) R_i + T_i s_A = X' R_i' + T_i'
             X' R_A R_i + T_A R_i + T_i s_A = X' R_i' + T_i'
                ^^^^^^^   ^^^^^^^^^^^^^^^^^
                ~= R_i'        ~= T_i'
-            ```
+
         i.e. after estimating R_A, T_A, s_A, the aligned source cameras have
-        extrinsics:
-            `cameras_src_align = (R_A R_i, T_A R_i + T_i s_A) ~= (R_i', T_i')`
+        extrinsics: ::
+
+            cameras_src_align = (R_A R_i, T_A R_i + T_i s_A) ~= (R_i', T_i')
 
     We support two ways `R_A, T_A, s_A` can be estimated:
         1) `mode=='centers'`
@@ -73,12 +74,12 @@ def corresponding_cameras_alignment(
 
         2) `mode=='extrinsics'`
             Defines the alignment problem as a system
-            of the following equations:
-                ```
+            of the following equations: ::
+
                 for all i:
                 [ R_A   0 ] x [ R_i         0 ] = [ R_i' 0 ]
                 [ T_A^T 1 ]   [ (s_A T_i^T) 1 ]   [ T_i' 1 ]
-                ```
+
             `R_A, T_A` and `s_A` are then obtained by solving the
             system in the least squares sense.
 
diff --git a/pytorch3d/renderer/cameras.py b/pytorch3d/renderer/cameras.py
@@ -36,15 +36,15 @@ class CamerasBase(TensorProperties):
 
     For cameras, there are four different coordinate systems (or spaces)
     - World coordinate system: This is the system the object lives - the world.
-    - Camera view coordinate system: This is the system that has its origin on the camera
-        and the and the Z-axis perpendicular to the image plane.
+    - Camera view coordinate system: This is the system that has its origin on
+        the camera and the Z-axis perpendicular to the image plane.
         In PyTorch3D, we assume that +X points left, and +Y points up and
         +Z points out from the image plane.
         The transformation from world --> view happens after applying a rotation (R)
         and translation (T)
     - NDC coordinate system: This is the normalized coordinate system that confines
-        in a volume the rendered part of the object or scene. Also known as view volume.
-        For square images, given the PyTorch3D convention, (+1, +1, znear)
+        points in a volume the rendered part of the object or scene, also known as
+        view volume. For square images, given the PyTorch3D convention, (+1, +1, znear)
         is the top left near corner, and (-1, -1, zfar) is the bottom right far
         corner of the volume.
         The transformation from view --> NDC happens after applying the camera
@@ -54,10 +54,9 @@ class CamerasBase(TensorProperties):
     - Screen coordinate system: This is another representation of the view volume with
         the XY coordinates defined in image space instead of a normalized space.
 
-    A better illustration of the coordinate systems can be found in
-    pytorch3d/docs/notes/cameras.md.
+    An illustration of the coordinate systems can be found in pytorch3d/docs/notes/cameras.md.
 
-    It defines methods that are common to all camera models:
+    CameraBase defines methods that are common to all camera models:
         - `get_camera_center` that returns the optical center of the camera in
             world coordinates
         - `get_world_to_view_transform` which returns a 3D transform from
@@ -167,8 +166,8 @@ def get_camera_center(self, **kwargs) -> torch.Tensor:
                 as keyword arguments to override the default values
                 set in __init__.
 
-        Setting T here will update the values set in init as this
-        value may be needed later on in the rendering pipeline e.g. for
+        Setting R or T here will update the values set in init as these
+        values may be needed later on in the rendering pipeline e.g. for
         lighting calculations.
 
         Returns:
@@ -237,8 +236,9 @@ def transform_points(
         self, points, eps: Optional[float] = None, **kwargs
     ) -> torch.Tensor:
         """
-        Transform input points from world to camera space with the
-        projection matrix defined by the camera.
+        Transform input points from world to camera space.
+        If camera is defined in NDC space, the projected points are in NDC space.
+        If camera is defined in screen space, the projected points are in screen space.
 
         For `CamerasBase.transform_points`, setting `eps > 0`
         stabilizes gradients since it leads to avoiding division
@@ -492,7 +492,7 @@ class FoVPerspectiveCameras(CamerasBase):
     """
     A class which stores a batch of parameters to generate a batch of
     projection matrices by specifying the field of view.
-    The definition of the parameters follow the OpenGL perspective camera.
+    The definitions of the parameters follow the OpenGL perspective camera.
 
     The extrinsics of the camera (R and T matrices) can also be set in the
     initializer or passed in to `get_full_projection_transform` to get
@@ -780,7 +780,7 @@ class FoVOrthographicCameras(CamerasBase):
     """
     A class which stores a batch of parameters to generate a batch of
     projection matrices by specifying the field of view.
-    The definition of the parameters follow the OpenGL orthographic camera.
+    The definitions of the parameters follow the OpenGL orthographic camera.
     """
 
     # For __getitem__
diff --git a/pytorch3d/transforms/transform3d.py b/pytorch3d/transforms/transform3d.py
@@ -165,7 +165,7 @@ def __init__(
                 raise ValueError('"matrix" has to be a 2- or a 3-dimensional tensor.')
             if matrix.shape[-2] != 4 or matrix.shape[-1] != 4:
                 raise ValueError(
-                    '"matrix" has to be a tensor of shape (minibatch, 4, 4)'
+                    '"matrix" has to be a tensor of shape (minibatch, 4, 4) or (4, 4).'
                 )
             # set dtype and device from matrix
             dtype = matrix.dtype

Original file line number	Diff line number	Diff line change
`@@ -165,7 +165,7 @@ def __init__(`
`165`	`165`	`raise ValueError('"matrix" has to be a 2- or a 3-dimensional tensor.')`
`166`	`166`	`if matrix.shape[-2] != 4 or matrix.shape[-1] != 4:`
`167`	`167`	`raise ValueError(`
`168`		`- '"matrix" has to be a tensor of shape (minibatch, 4, 4)'`
	`168`	`+ '"matrix" has to be a tensor of shape (minibatch, 4, 4) or (4, 4).'`
`169`	`169`	`)`
`170`	`170`	`# set dtype and device from matrix`
`171`	`171`	`dtype = matrix.dtype`