def batch(corpora,iput):
import pyodbc,nltk,array,re,itertools,file_ip_console
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from operator import itemgetter
from nltk.corpus import stopwords
from nltk.tag import UnigramTagger
from nltk.corpus import treebank
from nltk.tag import SequentialBackoffTagger
from nltk.corpus import wordnet
from nltk.probability import FreqDist
from nltk.tag import tnt
from nltk.corpus import conll2007
from nltk.tag import AffixTagger
from nltk.corpus import brown
from nltk.corpus import reuters
from nltk.corpus import stopwords
#########################################
# Connect to database #
#########################################
cnxn = pyodbc.connect('Driver={MySQL ODBC 5.1 Driver};Server=127.0.0.1;Port=3306;Database=information_schema;User=root; Password=1234;Option=3;')
cursor = cnxn.cursor()
cursor.execute("use collegedatabase ;")
cursor.execute("select * from tblalldata;")
cnxn.commit()
def isAcceptableChar(character):
return character not in "~@#$%^&*()_+`1234567890-={}|<>[]\',/\""
if corpora ==1:
train_sents = treebank.tagged_sents()[:6000000]
if corpora ==2:
train_sents = conll2007.tagged_sents()[:6000000]
if corpora ==3:
train_sents = brown.tagged_sents(categories='news')[:6000000]
tagger = UnigramTagger(train_sents)
tnt_tagger = tnt.TnT()
tnt_tagger.train(train_sents)
affix_tagger = AffixTagger(train_sents)
s=[];ji=[];text=[];tokens = [];pos_tagging= [];length_of_pos_tagging=[];nnp_array=[];nnp_array_index_coloumn=[];nnp_array_index_row=[];uniques = [];c= [];add1 = 0;add2 =0;regex = re.compile("\w+\.")
english_stops = set(stopwords.words('english'))
for entry in cursor:
s.append(entry.propernoun_SRNO),ji.append(entry.regular_text)
if iput ==1:
for i in range(0,len(s)):
text.append(filter(isAcceptableChar, ji[i]))
tokens.append([word for word in nltk.regexp_tokenize(text[i],r"\w+") if word not in english_stops])
pos_tagging.append(tagger.tag(tokens[i]))
length_of_pos_tagging.append(len(pos_tagging[i]))
for j in range(0,length_of_pos_tagging[i]):
if pos_tagging[i][j][1] == "NNP" or pos_tagging[i][j][1] == "NP-TL" or pos_tagging[i][j][1] == "NP$" or pos_tagging[i][j][1] == "NP" or pos_tagging[i][j][1] == None or pos_tagging[i][j][1] == "Unk":
nnp_array.append(list(pos_tagging[i][j])[0])
nnp_array_index_coloumn.append(i)
nnp_array_index_row.append(j)
starting_index = file_ip_console.sql_var_index
ending_index = starting_index +len(s)
i = 0
for j in range(starting_index,ending_index):
for i in range(0,len(s)):
c.append(nnp_array_index_coloumn.count(i))
add1 = add1 + c[i-1]
add2 = add1 + c[i]
xx = str(iput),str(corpora)
cursor.execute("update tblauto_tagged set tagger_train = ? ,propernoun = ? ,propernoun_ID = ? where propernoun_SRNO = ? ",str(xx),str(nnp_array[add1:add2]),str(c[i]),j )
for item in nnp_array_index_coloumn:
if item not in uniques:
uniques.append(item)
add1=0;add2=0
write_ending_loop_index = open("C:\\Users\\vchauhan\\Dropbox\\Code\\sql_var.txt","w")
write_ending_loop_index.write(str(j))
write_ending_loop_index.close()
cnxn.commit()
return()
########################################################################
The problem with this code is it does not write back into table tblauto_tagged python code does not return any error and I am not so good at programming and database;
########################################################################
import pyodbc,nltk,array,re,itertools,file_ip_console
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize
from operator import itemgetter
from nltk.corpus import stopwords
from nltk.tag import UnigramTagger
from nltk.corpus import treebank
from nltk.tag import SequentialBackoffTagger
from nltk.corpus import wordnet
from nltk.probability import FreqDist
from nltk.tag import tnt
from nltk.corpus import conll2007
from nltk.tag import AffixTagger
from nltk.corpus import brown
from nltk.corpus import reuters
from nltk.corpus import stopwords
#########################################
# Connect to database #
#########################################
cnxn = pyodbc.connect('Driver={MySQL ODBC 5.1 Driver};Server=127.0.0.1;Port=3306;Database=information_schema;User=root; Password=1234;Option=3;')
cursor = cnxn.cursor()
cursor.execute("use collegedatabase ;")
cursor.execute("select * from tblalldata;")
cnxn.commit()
def isAcceptableChar(character):
return character not in "~@#$%^&*()_+`1234567890-={}|<>[]\',/\""
if corpora ==1:
train_sents = treebank.tagged_sents()[:6000000]
if corpora ==2:
train_sents = conll2007.tagged_sents()[:6000000]
if corpora ==3:
train_sents = brown.tagged_sents(categories='news')[:6000000]
tagger = UnigramTagger(train_sents)
tnt_tagger = tnt.TnT()
tnt_tagger.train(train_sents)
affix_tagger = AffixTagger(train_sents)
s=[];ji=[];text=[];tokens = [];pos_tagging= [];length_of_pos_tagging=[];nnp_array=[];nnp_array_index_coloumn=[];nnp_array_index_row=[];uniques = [];c= [];add1 = 0;add2 =0;regex = re.compile("\w+\.")
english_stops = set(stopwords.words('english'))
for entry in cursor:
s.append(entry.propernoun_SRNO),ji.append(entry.regular_text)
if iput ==1:
for i in range(0,len(s)):
text.append(filter(isAcceptableChar, ji[i]))
tokens.append([word for word in nltk.regexp_tokenize(text[i],r"\w+") if word not in english_stops])
pos_tagging.append(tagger.tag(tokens[i]))
length_of_pos_tagging.append(len(pos_tagging[i]))
for j in range(0,length_of_pos_tagging[i]):
if pos_tagging[i][j][1] == "NNP" or pos_tagging[i][j][1] == "NP-TL" or pos_tagging[i][j][1] == "NP$" or pos_tagging[i][j][1] == "NP" or pos_tagging[i][j][1] == None or pos_tagging[i][j][1] == "Unk":
nnp_array.append(list(pos_tagging[i][j])[0])
nnp_array_index_coloumn.append(i)
nnp_array_index_row.append(j)
starting_index = file_ip_console.sql_var_index
ending_index = starting_index +len(s)
i = 0
for j in range(starting_index,ending_index):
for i in range(0,len(s)):
c.append(nnp_array_index_coloumn.count(i))
add1 = add1 + c[i-1]
add2 = add1 + c[i]
xx = str(iput),str(corpora)
cursor.execute("update tblauto_tagged set tagger_train = ? ,propernoun = ? ,propernoun_ID = ? where propernoun_SRNO = ? ",str(xx),str(nnp_array[add1:add2]),str(c[i]),j )
for item in nnp_array_index_coloumn:
if item not in uniques:
uniques.append(item)
add1=0;add2=0
write_ending_loop_index = open("C:\\Users\\vchauhan\\Dropbox\\Code\\sql_var.txt","w")
write_ending_loop_index.write(str(j))
write_ending_loop_index.close()
cnxn.commit()
return()
########################################################################
The problem with this code is it does not write back into table tblauto_tagged python code does not return any error and I am not so good at programming and database;
########################################################################