diff --git a/retrieval/contriever/LB2mC.py b/retrieval/contriever/LB2mC.py index fa866b763f51051795c21d9e3b61b008235f7f08..a39b882526545b5646f034a3e2981cfc7782ec13 100644 --- a/retrieval/contriever/LB2mC.py +++ b/retrieval/contriever/LB2mC.py @@ -37,7 +37,7 @@ def process_jsonl_file(input_file, output_folder, chunk_size=100, filename='Unkn for i, chunk in enumerate(chunks): output_datum = { 'id': data['_id'] + '_' + str(i), - 'text': chunk, + 'text': chunk.strip(), 'title': '' } output_data.append(output_datum)