Python: I need a help for insert with Scrapy to MySQL.
This is my error, Traceback:
2013-09-13 13:08:18-0700 [scrapy] Unhandled Error
Traceback (most recent call last):
File "C:\python27\lib\threading.py", line 781, in __bootstrap
self.__bootstrap_inner()
File "C:\python27\lib\threading.py", line 808, in __bootstrap_inner
self.run()
File "C:\python27\lib\threading.py", line 761, in run
self.__target(*self.__args, **self.__kwargs)
--- <exception caught here> ---
File "C:\python27\lib\site-packages\twisted\python\threadpool.py", lin
e 191, in _worker
result = context.call(ctx, function, *args, **kwargs)
File "C:\python27\lib\site-packages\twisted\python\context.py", line 1
18, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "C:\python27\lib\site-packages\twisted\python\context.py", line 8
1, in callWithContext
return func(*args,**kw)
File "C:\python27\lib\site-packages\twisted\enterprise\adbapi.py", lin
e 448, in _runInteraction
result = interaction(trans, *args, **kw)
File "apple\pipelines.py", line 44, in _conditional_insert
tx.execute('select * from `job` where day = %s' % item['day'])
File "C:\python27\lib\site-packages\MySQLdb\cursors.py", line 202, in
execute
self.errorhandler(self, exc, value)
File "C:\python27\lib\site-packages\MySQLdb\connections.py", line 36,
in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQ
L syntax; check the manual that corresponds to your MySQL server version for the
right syntax to use near '[u'September 13']' at line 1")
2013-09-13 13:08:18-0700 [apple] INFO: Closing spider (finished)
2013-09-13 13:08:18-0700 [apple] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 223,
'downloader/request_count': 1,
'downloader/request_method_count/GET': 1,
'downloader/response_bytes': 14674,
'downloader/response_count': 1,
'downloader/response_status_count/200': 1,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2013, 9, 13, 20, 8, 18, 579000),
'item_scraped_count': 434,
'log_count/DEBUG': 441,
'log_count/ERROR': 434,
'log_count/INFO': 4,
'response_received_count': 1,
'scheduler/dequeued': 1,
'scheduler/dequeued/memory': 1,
'scheduler/enqueued': 1,
'scheduler/enqueued/memory': 1,
'start_time': datetime.datetime(2013, 9, 13, 20, 6, 57, 904000)}
2013-09-13 13:08:18-0700 [apple] INFO: Spider closed (finished)
This is my pipelines.py
from scrapy.exceptions import DropItem
from scrapy.exceptions import DropItem
from scrapy import log
from twisted.enterprise import adbapi
import MySQLdb.cursors
class ApplePipeline(object):
def process_item(self, item, spider):
for word in self.words_to_filter:
if word in unicode(item['team1']).lower():
raise DropItem("Contains forbidden word: %s" % word)
else:
return item
class MySQLStorePipeline(object):
def __init__(self):
# @@@ hardcoded db settings
# TODO: make settings configurable through settings
self.dbpool = adbapi.ConnectionPool('MySQLdb',
host='127.0.0.1',
db='mydb',
user='myuser',
passwd='mypass',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, item, spider):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
# create record if doesn't exist.
# all this block run on it's own thread
# aici extrage prima echipa
tx.execute('select * from `scoruri` where day = %s' % item['day'])
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute(\
"INSERT INTO `scoruri` (competition, day, hour, score, team1, team2) "
"values (%s, %s, %s, %s, %s, %s)",
(item['competition'],
item['day'],
item['hour'],
item['score'],
item['team1'],
item['team2'],
)
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
Thanks in advance for any help!
This is my error, Traceback:
2013-09-13 13:08:18-0700 [scrapy] Unhandled Error
Traceback (most recent call last):
File "C:\python27\lib\threading.py", line 781, in __bootstrap
self.__bootstrap_inner()
File "C:\python27\lib\threading.py", line 808, in __bootstrap_inner
self.run()
File "C:\python27\lib\threading.py", line 761, in run
self.__target(*self.__args, **self.__kwargs)
--- <exception caught here> ---
File "C:\python27\lib\site-packages\twisted\python\threadpool.py", lin
e 191, in _worker
result = context.call(ctx, function, *args, **kwargs)
File "C:\python27\lib\site-packages\twisted\python\context.py", line 1
18, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "C:\python27\lib\site-packages\twisted\python\context.py", line 8
1, in callWithContext
return func(*args,**kw)
File "C:\python27\lib\site-packages\twisted\enterprise\adbapi.py", lin
e 448, in _runInteraction
result = interaction(trans, *args, **kw)
File "apple\pipelines.py", line 44, in _conditional_insert
tx.execute('select * from `job` where day = %s' % item['day'])
File "C:\python27\lib\site-packages\MySQLdb\cursors.py", line 202, in
execute
self.errorhandler(self, exc, value)
File "C:\python27\lib\site-packages\MySQLdb\connections.py", line 36,
in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.ProgrammingError: (1064, "You have an error in your SQ
L syntax; check the manual that corresponds to your MySQL server version for the
right syntax to use near '[u'September 13']' at line 1")
2013-09-13 13:08:18-0700 [apple] INFO: Closing spider (finished)
2013-09-13 13:08:18-0700 [apple] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 223,
'downloader/request_count': 1,
'downloader/request_method_count/GET': 1,
'downloader/response_bytes': 14674,
'downloader/response_count': 1,
'downloader/response_status_count/200': 1,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2013, 9, 13, 20, 8, 18, 579000),
'item_scraped_count': 434,
'log_count/DEBUG': 441,
'log_count/ERROR': 434,
'log_count/INFO': 4,
'response_received_count': 1,
'scheduler/dequeued': 1,
'scheduler/dequeued/memory': 1,
'scheduler/enqueued': 1,
'scheduler/enqueued/memory': 1,
'start_time': datetime.datetime(2013, 9, 13, 20, 6, 57, 904000)}
2013-09-13 13:08:18-0700 [apple] INFO: Spider closed (finished)
This is my pipelines.py
from scrapy.exceptions import DropItem
from scrapy.exceptions import DropItem
from scrapy import log
from twisted.enterprise import adbapi
import MySQLdb.cursors
class ApplePipeline(object):
def process_item(self, item, spider):
for word in self.words_to_filter:
if word in unicode(item['team1']).lower():
raise DropItem("Contains forbidden word: %s" % word)
else:
return item
class MySQLStorePipeline(object):
def __init__(self):
# @@@ hardcoded db settings
# TODO: make settings configurable through settings
self.dbpool = adbapi.ConnectionPool('MySQLdb',
host='127.0.0.1',
db='mydb',
user='myuser',
passwd='mypass',
cursorclass=MySQLdb.cursors.DictCursor,
charset='utf8',
use_unicode=True
)
def process_item(self, item, spider):
# run db query in thread pool
query = self.dbpool.runInteraction(self._conditional_insert, item)
query.addErrback(self.handle_error)
return item
def _conditional_insert(self, tx, item):
# create record if doesn't exist.
# all this block run on it's own thread
# aici extrage prima echipa
tx.execute('select * from `scoruri` where day = %s' % item['day'])
result = tx.fetchone()
if result:
log.msg("Item already stored in db: %s" % item, level=log.DEBUG)
else:
tx.execute(\
"INSERT INTO `scoruri` (competition, day, hour, score, team1, team2) "
"values (%s, %s, %s, %s, %s, %s)",
(item['competition'],
item['day'],
item['hour'],
item['score'],
item['team1'],
item['team2'],
)
)
log.msg("Item stored in db: %s" % item, level=log.DEBUG)
def handle_error(self, e):
log.err(e)
Thanks in advance for any help!