Skip to content

Commit b401824

Browse files
CoordinateSource patches (#582)
* take set of mrc sizes (this was the original intention) * add custom number-parsable method * accomodate floats in coordinate files * add tests covering floats in coord files
1 parent 11c3dca commit b401824

File tree

2 files changed

+86
-8
lines changed

2 files changed

+86
-8
lines changed

src/aspire/source/coordinates.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def __init__(self, files, particle_size, max_rows):
9595
f"{self.__class__.__name__} from {os.path.dirname(self.mrc_paths[0])} contains {len(mrc_paths)} micrographs, {len(self.particles)} picked particles."
9696
)
9797
# report different mrc shapes
98-
logger.info(f"Micrographs have the following shapes: {*self.mrc_shapes,}")
98+
logger.info(f"Micrographs have the following shapes: {*set(self.mrc_shapes),}")
9999

100100
# remove particles whose boxes do not fit at given particle_size
101101
# and get number removed
@@ -325,6 +325,14 @@ def _images(self, start=0, num=np.inf, indices=None):
325325

326326
return Image(im)
327327

328+
@staticmethod
329+
def _is_number(text):
330+
"""
331+
Used in validation of coordinate files. We allow strings containing
332+
- or . to account for negative values and floats.
333+
"""
334+
return text.replace("-", "1").replace(".", "1").isdigit()
335+
328336

329337
class BoxesCoordinateSource(CoordinateSource):
330338
"""
@@ -355,7 +363,7 @@ def _extract_box_size(self, box_file):
355363
with open(box_file, "r") as box:
356364
first_line = box.readlines()[0].split()
357365
if len(first_line) >= 4:
358-
box_size = int(first_line[2]) # x size or y size works
366+
box_size = int(float(first_line[2])) # x size or y size works
359367
return box_size
360368
else:
361369
logger.error(f"Problem with coordinate file: {box_file}")
@@ -380,14 +388,14 @@ def _validate_box_file(self, box_file, global_particle_size):
380388
"flag in aspire extract-particles."
381389
)
382390

383-
if not all(p.isnumeric() for p in line.split()):
391+
if not all(self._is_number(p) for p in line.split()):
384392
logger.error(f"Problem with coordinate file: {box_file}")
385393
raise ValueError(
386394
"Coordinate file contains non-numeric coordinate values."
387395
)
388396

389397
# we can only accept square particles
390-
size_x, size_y = int(line.split()[2]), int(line.split()[3])
398+
size_x, size_y = float(line.split()[2]), float(line.split()[3])
391399
if size_x != size_y:
392400
logger.error(f"Problem with coordinate file: {box_file}")
393401
raise ValueError(
@@ -429,7 +437,7 @@ def _coords_list_from_file(self, coord_file):
429437
with open(coord_file, "r") as infile:
430438
lines = [line.split() for line in infile.readlines()]
431439
# coords are already in box format, so simply cast to int
432-
return [[int(x) for x in line] for line in lines]
440+
return [[int(float(x)) for x in line] for line in lines]
433441

434442
def _force_new_particle_size(self, new_size):
435443
"""
@@ -485,7 +493,7 @@ def _validate_centers_file(self, coord_file):
485493
"Coordinate file contains a line with less than 2 numbers."
486494
)
487495
# check that the coordinate has numeric values
488-
if not all(c.isnumeric() for c in line.split()):
496+
if not all(self._is_number(c) for c in line.split()):
489497
logger.error(f"Problem with coordinate file: {coord_file}")
490498
raise ValueError(
491499
"Coordinate file contains non-numeric coordinate values."
@@ -504,7 +512,7 @@ def _validate_starfile(self, coord_file):
504512
)
505513
# check that all values in each column are numeric
506514
if not all(
507-
all(df[col].str.isnumeric())
515+
all(df[col].apply(self._is_number))
508516
for col in ["_rlnCoordinateX", "_rlnCoordinateY"]
509517
):
510518
logger.error(f"Problem with coordinate file: {coord_file}")
@@ -531,5 +539,5 @@ def _coords_list_from_file(self, coord_file):
531539
return self._coords_list_from_star(coord_file)
532540
# otherwise we assume text file format with one coord per line:
533541
with open(coord_file, "r") as infile:
534-
lines = [line.split() for line in infile.readlines()]
542+
lines = [[float(c) for c in line.split()] for line in infile.readlines()]
535543
return [self._box_coord_from_center(line, self.particle_size) for line in lines]

tests/test_coordinate_source.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,13 @@ def setUp(self):
8585
self.createTestCoordFiles(_centers, i)
8686
self.createTestStarFiles(_centers, i)
8787

88+
# Create extra coordinate files with float
89+
# coordinates to make sure we can process these
90+
# as well
91+
self.createFloatBoxFile(centers)
92+
self.createFloatCoordFile(centers)
93+
self.createFloatStarFile(centers)
94+
8895
# create lists of files
8996
self.all_mrc_paths = sorted(glob(os.path.join(self.data_folder, "sample*.mrc")))
9097
# create file lists that will be used several times
@@ -113,6 +120,10 @@ def setUp(self):
113120
)
114121
)
115122

123+
self.float_box = os.path.join(self.data_folder, "float.box")
124+
self.float_coord = os.path.join(self.data_folder, "float.coord")
125+
self.float_star = os.path.join(self.data_folder, "float.star")
126+
116127
def tearDown(self):
117128
self.tmpdir.cleanup()
118129

@@ -166,6 +177,47 @@ def createTestStarFiles(self, centers, index):
166177
starfile = StarFile(blocks=blocks)
167178
starfile.write(star_fp)
168179

180+
def createFloatBoxFile(self, centers):
181+
# for testing float coordinates
182+
# create a box file (lower left corner and X/Y sizes)
183+
box_fp = os.path.join(self.data_folder, "float.box")
184+
# populate box file with coordinates in box format
185+
with open(box_fp, "w") as box:
186+
for center in centers:
187+
# to make a box file, we convert the centers to lower left
188+
# corners by subtracting half the particle size (here: 256)
189+
lower_left_corners = (center[0] - 128, center[1] - 128)
190+
box.write(
191+
f"{lower_left_corners[0]}.000\t{lower_left_corners[1]}.000\t256.000\t256.000\n"
192+
)
193+
194+
def createFloatCoordFile(self, centers):
195+
# for testing float coordinates
196+
# create a coord file (only particle centers listed)
197+
coord_fp = os.path.join(self.data_folder, "float.coord")
198+
# populate coord file with particle centers
199+
with open(coord_fp, "w") as coord:
200+
for center in centers:
201+
# .coord file usually contains just the centers
202+
coord.write(f"{center[0]}.000\t{center[1]}.000\n")
203+
204+
def createFloatStarFile(self, centers):
205+
# for testing float coordinates
206+
# create a star file (only particle centers listed)
207+
star_fp = os.path.join(self.data_folder, "float.star")
208+
# populate star file with particle centers
209+
x_coords = [str(center[0]) + ".000" for center in centers]
210+
y_coords = [str(center[1]) + ".000" for center in centers]
211+
blocks = OrderedDict(
212+
{
213+
"coordinates": DataFrame(
214+
{"_rlnCoordinateX": x_coords, "_rlnCoordinateY": y_coords}
215+
)
216+
}
217+
)
218+
starfile = StarFile(blocks=blocks)
219+
starfile.write(star_fp)
220+
169221
def testLoadFromBox(self):
170222
# ensure successful loading from box files
171223
BoxesCoordinateSource(self.files_box)
@@ -178,6 +230,24 @@ def testLoadFromStar(self):
178230
# ensure successful loading from particle center files (.star)
179231
CentersCoordinateSource(self.files_star, particle_size=256)
180232

233+
def testLoadFromBox_Floats(self):
234+
# ensure successful loading from box files with float coordinates
235+
BoxesCoordinateSource([(self.all_mrc_paths[0], self.float_box)])
236+
237+
def testLoadFromCenters_Floats(self):
238+
# ensure successful loading from particle center files (.coord)
239+
# with float coordinates
240+
CentersCoordinateSource(
241+
[(self.all_mrc_paths[0], self.float_coord)], particle_size=256
242+
)
243+
244+
def testLoadFromStar_Floats(self):
245+
# ensure successful loading from particle center files (.star)
246+
# with float coordinates
247+
CentersCoordinateSource(
248+
[(self.all_mrc_paths[0], self.float_star)], particle_size=256
249+
)
250+
181251
def testNonSquareParticles(self):
182252
# nonsquare box sizes must fail
183253
with self.assertRaises(ValueError):

0 commit comments

Comments
 (0)