diff --git a/run_wrap.py b/run_wrap.py index a2ef54e..e2ddc39 100644 --- a/run_wrap.py +++ b/run_wrap.py @@ -27,8 +27,5 @@ for line in proc.stdout: last = dec t1 = time.time() -print('\n---\n') -print(enc.decode(tokens)) - -print(f"achieved tok/s: {len(tokens) / (t1 - t0)}") +print(f"\nachieved tok/s: {len(tokens) / (t1 - t0)}") proc.wait() diff --git a/tokenizer.py b/tokenizer.py index 5466454..765b30d 100644 --- a/tokenizer.py +++ b/tokenizer.py @@ -15,16 +15,14 @@ class Tokenizer: model_path = TOKENIZER_MODEL assert os.path.isfile(model_path), model_path self.sp_model = SentencePieceProcessor(model_file=model_path) - print(f"Loaded SentencePiece model from {model_path}") + #print(f"Loaded SentencePiece model from {model_path}") # BOS / EOS token IDs self.n_words: int = self.sp_model.vocab_size() self.bos_id: int = self.sp_model.bos_id() self.eos_id: int = self.sp_model.eos_id() self.pad_id: int = self.sp_model.pad_id() - print( - f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}" - ) + #print(f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}") assert self.sp_model.vocab_size() == self.sp_model.get_piece_size() def encode(self, s: str, bos: bool, eos: bool) -> List[int]: