Difference between revisions of "Scripts keyword"

Latest revision as of 06:51, 28 October 2015

!/usr/bin/env python
-*- coding: utf-8 -*-

import os from random import * import time import nltk

VARIABLES

now = time.strftime("%d-%m-%Y à %H:%M:%S") selected_text = []

FUNCTIONS

Check if sentence contains keyword, if so, generate fake paragraps with these sentences

def split_sentences():

   with open("4_inventions_a_faire.txt") as f:
       text = f.read()
   # Split text into sentences with help of nltk
       sent_tokenizer=nltk.data.load('tokenizers/punkt/english.pickle')
       sentences = sent_tokenizer.tokenize(text)
   return sentences

def select_sentences(sentences):

   for sentence in sentences:
       wordlist = sentence.split(" ")
       for word in wordlist:
           i = 0
           if word in keywords:
               i += 1
               sentence = sentence+' '
               if i == randint(1,6):
                   sentence = sentence+"\n\n"
                   i = 0
               selected_text.append(sentence)
   return selected_text

Write to file

def writetofile(content): try: logfile = open("sentences.txt", "a") try: logfile.write(content) finally: logfile.close() except IOError: pass

keywords

keywords = ['machine', 'machines']

OU
keywords = []
for line in open("lelivre_extrait.txt"):
for word in line.split():
if word.endswith('ing'):
keywords.append(word)

SCRIPT

split text into sentences

sentences = split_sentences()

select sentences based on keywords

selected_text = select_sentences(sentences)

write to new file

writetofile('Traîté de la Documentation\n') writetofile('Paul Otlet & Henri Lafontaine\n') writetofile('Sélection à base de ' + ', '.join(keywords) + '\n\n\n\n') for sentence in selected_text:

   writetofile(sentence)

writetofile('\n\n\nCe texte a été généré le '+now + '.')

Difference between revisions of "Scripts keyword"

From Mondothèque

Latest revision as of 06:51, 28 October 2015

What links here