Script lexique de mots
From Mondothèque
- !/usr/bin/env/ python
import string
- remove punctuation
def remove_punct(f): tokens = (' '.join(line.replace('\n', ) for line in f)).lower() for c in string.punctuation: tokens= tokens.replace(c,"") return tokens
- add words of the text to set, a list of unique items
def lexicon(tokens): for word in tokens.split(" "): wordset.add(word) return wordset
- sort words alphabetically & write words to file
def publish(wordset): alphalist = sorted(list(wordset)) # can be reversed: (, reverse = True) for word in alphalist: words.write(word + "\n")
- define & open input/output file
f = open("1_notion.txt", "rt", encoding = "utf-8") words = open("mots.txt", 'wt', encoding = "utf-8") wordset = set()
- execute functions
tokens = remove_punct(f) wordset = lexicon(tokens) publish(wordset)
- close files
f.close() words.close()