@@ -40,15 +40,15 @@ def YelpReviewFull(root: str, split: Union[Tuple[str], str]):
4040
4141 url_dp = IterableWrapper ([URL ])
4242
43- cache_dp = url_dp .on_disk_cache (
43+ cache_compressed_dp = url_dp .on_disk_cache (
4444 filepath_fn = lambda x : os .path .join (root , _PATH ),
4545 hash_dict = {os .path .join (root , _PATH ): MD5 }, hash_type = "md5"
4646 )
47- cache_dp = GDriveReader (cache_dp ).end_caching (mode = "wb" , same_filepath_fn = True )
48- cache_dp = FileOpener (cache_dp , mode = "b" )
49-
50- cache_dp = cache_dp .on_disk_cache (filepath_fn = lambda x : os .path .join (root , _EXTRACTED_FILES [split ]))
51- cache_dp = cache_dp .read_from_tar ().filter (lambda x : _EXTRACTED_FILES [split ] in x [0 ])
52- cache_dp = cache_dp .end_caching (mode = "wb" , same_filepath_fn = True )
53- cache_dp = FileOpener (cache_dp , mode = "b" )
54- return cache_dp .parse_csv ().map (fn = lambda t : (int (t [0 ]), " " .join (t [1 :])))
47+ cache_compressed_dp = GDriveReader (cache_compressed_dp ).end_caching (mode = "wb" , same_filepath_fn = True )
48+ cache_compressed_dp = FileOpener (cache_compressed_dp , mode = "b" )
49+
50+ cache_decompressed_dp = cache_compressed_dp .on_disk_cache (filepath_fn = lambda x : os .path .join (root , _EXTRACTED_FILES [split ]))
51+ cache_decompressed_dp = cache_decompressed_dp .read_from_tar ().filter (lambda x : _EXTRACTED_FILES [split ] in x [0 ])
52+ cache_decompressed_dp = cache_decompressed_dp .end_caching (mode = "wb" , same_filepath_fn = True )
53+ data_dp = FileOpener (cache_decompressed_dp , mode = "b" )
54+ return data_dp .parse_csv ().map (fn = lambda t : (int (t [0 ]), " " .join (t [1 :])))
0 commit comments