@@ -217,6 +217,83 @@ def decode_single(self, rel_codes: Tensor, boxes: Tensor) -> Tensor:
217217 return pred_boxes
218218
219219
220+ class BoxLinearCoder :
221+ """
222+ The linear box-to-box transform defined in FCOS. The transformation is parameterized
223+ by the distance from the center of (square) src box to 4 edges of the target box.
224+ """
225+
226+ def __init__ (self , normalize_by_size : bool = True ) -> None :
227+ """
228+ Args:
229+ normalize_by_size (bool): normalize deltas by the size of src (anchor) boxes.
230+ """
231+ self .normalize_by_size = normalize_by_size
232+
233+ def encode_single (self , reference_boxes : Tensor , proposals : Tensor ) -> Tensor :
234+ """
235+ Encode a set of proposals with respect to some reference boxes
236+
237+ Args:
238+ reference_boxes (Tensor): reference boxes
239+ proposals (Tensor): boxes to be encoded
240+
241+ Returns:
242+ Tensor: the encoded relative box offsets that can be used to
243+ decode the boxes.
244+ """
245+ # get the center of reference_boxes
246+ reference_boxes_ctr_x = 0.5 * (reference_boxes [:, 0 ] + reference_boxes [:, 2 ])
247+ reference_boxes_ctr_y = 0.5 * (reference_boxes [:, 1 ] + reference_boxes [:, 3 ])
248+
249+ # get box regression transformation deltas
250+ target_l = reference_boxes_ctr_x - proposals [:, 0 ]
251+ target_t = reference_boxes_ctr_y - proposals [:, 1 ]
252+ target_r = proposals [:, 2 ] - reference_boxes_ctr_x
253+ target_b = proposals [:, 3 ] - reference_boxes_ctr_y
254+
255+ targets = torch .stack ((target_l , target_t , target_r , target_b ), dim = 1 )
256+ if self .normalize_by_size :
257+ reference_boxes_w = reference_boxes [:, 2 ] - reference_boxes [:, 0 ]
258+ reference_boxes_h = reference_boxes [:, 3 ] - reference_boxes [:, 1 ]
259+ reference_boxes_size = torch .stack (
260+ (reference_boxes_w , reference_boxes_h , reference_boxes_w , reference_boxes_h ), dim = 1
261+ )
262+ targets = targets / reference_boxes_size
263+
264+ return targets
265+
266+ def decode_single (self , rel_codes : Tensor , boxes : Tensor ) -> Tensor :
267+ """
268+ From a set of original boxes and encoded relative box offsets,
269+ get the decoded boxes.
270+
271+ Args:
272+ rel_codes (Tensor): encoded boxes
273+ boxes (Tensor): reference boxes.
274+
275+ Returns:
276+ Tensor: the predicted boxes with the encoded relative box offsets.
277+ """
278+
279+ boxes = boxes .to (rel_codes .dtype )
280+
281+ ctr_x = 0.5 * (boxes [:, 0 ] + boxes [:, 2 ])
282+ ctr_y = 0.5 * (boxes [:, 1 ] + boxes [:, 3 ])
283+ if self .normalize_by_size :
284+ boxes_w = boxes [:, 2 ] - boxes [:, 0 ]
285+ boxes_h = boxes [:, 3 ] - boxes [:, 1 ]
286+ boxes_size = torch .stack ((boxes_w , boxes_h , boxes_w , boxes_h ), dim = 1 )
287+ rel_codes = rel_codes * boxes_size
288+
289+ pred_boxes1 = ctr_x - rel_codes [:, 0 ]
290+ pred_boxes2 = ctr_y - rel_codes [:, 1 ]
291+ pred_boxes3 = ctr_x + rel_codes [:, 2 ]
292+ pred_boxes4 = ctr_y + rel_codes [:, 3 ]
293+ pred_boxes = torch .stack ((pred_boxes1 , pred_boxes2 , pred_boxes3 , pred_boxes4 ), dim = 1 )
294+ return pred_boxes
295+
296+
220297class Matcher :
221298 """
222299 This class assigns to each predicted "element" (e.g., a box) a ground-truth
0 commit comments