From 045bb9edc5f8b4a1c130811cc39f19417904beed Mon Sep 17 00:00:00 2001 From: Cédric Bonhomme Date: Wed, 18 Feb 2015 21:39:28 +0100 Subject: Alembic is magic! --- alembic/README | 1 + alembic/alembic.ini | 68 ++++++++++++++++++++ alembic/env.py | 72 ++++++++++++++++++++++ alembic/script.py.mako | 24 ++++++++ .../versions/48f561c0ce6_add_column_entry_id.py | 24 ++++++++ pyaggr3g470r/crawler.py | 9 ++- pyaggr3g470r/models.py | 1 + 7 files changed, 197 insertions(+), 2 deletions(-) create mode 100644 alembic/README create mode 100644 alembic/alembic.ini create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 alembic/versions/48f561c0ce6_add_column_entry_id.py diff --git a/alembic/README b/alembic/README new file mode 100644 index 00000000..98e4f9c4 --- /dev/null +++ b/alembic/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/alembic/alembic.ini b/alembic/alembic.ini new file mode 100644 index 00000000..b0341552 --- /dev/null +++ b/alembic/alembic.ini @@ -0,0 +1,68 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = . + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 00000000..fccd445a --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,72 @@ +from __future__ import with_statement +from alembic import context +from sqlalchemy import engine_from_config, pool +from logging.config import fileConfig + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure(url=url, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + engine = engine_from_config( + config.get_section(config.config_ini_section), + prefix='sqlalchemy.', + poolclass=pool.NullPool) + + connection = engine.connect() + context.configure( + connection=connection, + target_metadata=target_metadata + ) + + try: + with context.begin_transaction(): + context.run_migrations() + finally: + connection.close() + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 00000000..43c09401 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/48f561c0ce6_add_column_entry_id.py b/alembic/versions/48f561c0ce6_add_column_entry_id.py new file mode 100644 index 00000000..3f52a7a9 --- /dev/null +++ b/alembic/versions/48f561c0ce6_add_column_entry_id.py @@ -0,0 +1,24 @@ +"""add column entry_id + +Revision ID: 48f561c0ce6 +Revises: +Create Date: 2015-02-18 21:17:19.346998 + +""" + +# revision identifiers, used by Alembic. +revision = '48f561c0ce6' +down_revision = None +branch_labels = None +depends_on = None + +from alembic import op +import sqlalchemy as sa + + +def upgrade(): + op.add_column('article', sa.Column('entry_id', sa.String(), nullable=True)) + + +def downgrade(): + op.drop_column('article', 'entry_id') diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py index f2ae1599..1cfe5827 100644 --- a/pyaggr3g470r/crawler.py +++ b/pyaggr3g470r/crawler.py @@ -113,6 +113,11 @@ def parse_feed(user, feed): # remove utm_* parameters nice_url = utils.clean_url(nice_url) + try: + entry_id = article.id + except: + entry_id = nice_url + description = "" article_title = article.get('title', '') try: @@ -151,7 +156,7 @@ def parse_feed(user, feed): post_date = datetime.now(dateutil.tz.tzlocal()) # create the models.Article object and append it to the list of articles - article = Article(link=nice_url, title=article_title, + article = Article(entry_id=entry_id, link=nice_url, title=article_title, content=description, readed=False, like=False, date=post_date, user_id=user.id, feed_id=feed.id) @@ -172,7 +177,7 @@ def insert_database(user, feed): query1 = Article.query.filter(Article.user_id == user.id) query2 = query1.filter(Article.feed_id == feed.id) for article in articles: - exist = query2.filter(Article.link == article.link).count() != 0 + exist = query2.filter(Article.entry_id == article.entry_id).count() != 0 if exist: #logger.debug("Article %r (%r) already in the database.", article.title, article.link) continue diff --git a/pyaggr3g470r/models.py b/pyaggr3g470r/models.py index 21fedde4..b7a75d5f 100644 --- a/pyaggr3g470r/models.py +++ b/pyaggr3g470r/models.py @@ -121,6 +121,7 @@ class Article(db.Model): Represent an article from a feed. """ id = db.Column(db.Integer, primary_key = True) + entry_id = db.Column(db.String()) link = db.Column(db.String()) title = db.Column(db.String()) content = db.Column(db.String()) -- cgit