|
1 | 1 |
|
2 | | -import os |
3 | 2 | from dataclasses import dataclass |
4 | 3 | from functools import partial |
| 4 | +from urllib.parse import urljoin |
5 | 5 |
|
6 | 6 | from typing import Optional, Callable |
7 | 7 | from torchtext._download_hooks import load_state_dict_from_url |
@@ -100,19 +100,19 @@ def encoderConf(self) -> RobertaEncoderConf: |
100 | 100 |
|
101 | 101 |
|
102 | 102 | XLMR_BASE_ENCODER = RobertaModelBundle( |
103 | | - _path=os.path.join(_TEXT_BUCKET, "xlmr.base.encoder.pt"), |
| 103 | + _path=urljoin(_TEXT_BUCKET, "xlmr.base.encoder.pt"), |
104 | 104 | _encoder_conf=RobertaEncoderConf(vocab_size=250002), |
105 | 105 | transform=partial(get_xlmr_transform, |
106 | | - vocab_path=os.path.join(_TEXT_BUCKET, "xlmr.vocab.pt"), |
107 | | - spm_model_path=os.path.join(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
| 106 | + vocab_path=urljoin(_TEXT_BUCKET, "xlmr.vocab.pt"), |
| 107 | + spm_model_path=urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
108 | 108 | ) |
109 | 109 | ) |
110 | 110 |
|
111 | 111 | XLMR_LARGE_ENCODER = RobertaModelBundle( |
112 | | - _path=os.path.join(_TEXT_BUCKET, "xlmr.large.encoder.pt"), |
| 112 | + _path=urljoin(_TEXT_BUCKET, "xlmr.large.encoder.pt"), |
113 | 113 | _encoder_conf=RobertaEncoderConf(vocab_size=250002, embedding_dim=1024, ffn_dimension=4096, num_attention_heads=16, num_encoder_layers=24), |
114 | 114 | transform=partial(get_xlmr_transform, |
115 | | - vocab_path=os.path.join(_TEXT_BUCKET, "xlmr.vocab.pt"), |
116 | | - spm_model_path=os.path.join(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
| 115 | + vocab_path=urljoin(_TEXT_BUCKET, "xlmr.vocab.pt"), |
| 116 | + spm_model_path=urljoin(_TEXT_BUCKET, "xlmr.sentencepiece.bpe.model"), |
117 | 117 | ) |
118 | 118 | ) |
0 commit comments