From 045bb9edc5f8b4a1c130811cc39f19417904beed Mon Sep 17 00:00:00 2001
From: Cédric Bonhomme <cedric@cedricbonhomme.org>
Date: Wed, 18 Feb 2015 21:39:28 +0100
Subject: Alembic is magic!

---
 alembic/README                                     |  1 +
 alembic/alembic.ini                                | 68 ++++++++++++++++++++
 alembic/env.py                                     | 72 ++++++++++++++++++++++
 alembic/script.py.mako                             | 24 ++++++++
 .../versions/48f561c0ce6_add_column_entry_id.py    | 24 ++++++++
 pyaggr3g470r/crawler.py                            |  9 ++-
 pyaggr3g470r/models.py                             |  1 +
 7 files changed, 197 insertions(+), 2 deletions(-)
 create mode 100644 alembic/README
 create mode 100644 alembic/alembic.ini
 create mode 100644 alembic/env.py
 create mode 100644 alembic/script.py.mako
 create mode 100644 alembic/versions/48f561c0ce6_add_column_entry_id.py

diff --git a/alembic/README b/alembic/README
new file mode 100644
index 00000000..98e4f9c4
--- /dev/null
+++ b/alembic/README
@@ -0,0 +1 @@
+Generic single-database configuration.
\ No newline at end of file
diff --git a/alembic/alembic.ini b/alembic/alembic.ini
new file mode 100644
index 00000000..b0341552
--- /dev/null
+++ b/alembic/alembic.ini
@@ -0,0 +1,68 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+script_location = .
+
+# template used to generate migration files
+# file_template = %%(rev)s_%%(slug)s
+
+# max length of characters to apply to the
+# "slug" field
+#truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; this defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path
+# version_locations = %(here)s/bar %(here)s/bat alembic/versions
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = driver://user:pass@localhost/dbname
+
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/alembic/env.py b/alembic/env.py
new file mode 100644
index 00000000..fccd445a
--- /dev/null
+++ b/alembic/env.py
@@ -0,0 +1,72 @@
+from __future__ import with_statement
+from alembic import context
+from sqlalchemy import engine_from_config, pool
+from logging.config import fileConfig
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+target_metadata = None
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline():
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(url=url, target_metadata=target_metadata)
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online():
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    engine = engine_from_config(
+        config.get_section(config.config_ini_section),
+        prefix='sqlalchemy.',
+        poolclass=pool.NullPool)
+
+    connection = engine.connect()
+    context.configure(
+        connection=connection,
+        target_metadata=target_metadata
+    )
+
+    try:
+        with context.begin_transaction():
+            context.run_migrations()
+    finally:
+        connection.close()
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/alembic/script.py.mako b/alembic/script.py.mako
new file mode 100644
index 00000000..43c09401
--- /dev/null
+++ b/alembic/script.py.mako
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+def upgrade():
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade():
+    ${downgrades if downgrades else "pass"}
diff --git a/alembic/versions/48f561c0ce6_add_column_entry_id.py b/alembic/versions/48f561c0ce6_add_column_entry_id.py
new file mode 100644
index 00000000..3f52a7a9
--- /dev/null
+++ b/alembic/versions/48f561c0ce6_add_column_entry_id.py
@@ -0,0 +1,24 @@
+"""add column entry_id
+
+Revision ID: 48f561c0ce6
+Revises: 
+Create Date: 2015-02-18 21:17:19.346998
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '48f561c0ce6'
+down_revision = None
+branch_labels = None
+depends_on = None
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade():
+    op.add_column('article', sa.Column('entry_id', sa.String(), nullable=True))
+
+
+def downgrade():
+    op.drop_column('article', 'entry_id')
diff --git a/pyaggr3g470r/crawler.py b/pyaggr3g470r/crawler.py
index f2ae1599..1cfe5827 100644
--- a/pyaggr3g470r/crawler.py
+++ b/pyaggr3g470r/crawler.py
@@ -113,6 +113,11 @@ def parse_feed(user, feed):
         # remove utm_* parameters
         nice_url = utils.clean_url(nice_url)
 
+        try:
+            entry_id = article.id
+        except:
+            entry_id = nice_url
+
         description = ""
         article_title = article.get('title', '')
         try:
@@ -151,7 +156,7 @@ def parse_feed(user, feed):
             post_date = datetime.now(dateutil.tz.tzlocal())
 
         # create the models.Article object and append it to the list of articles
-        article = Article(link=nice_url, title=article_title,
+        article = Article(entry_id=entry_id, link=nice_url, title=article_title,
                         content=description, readed=False, like=False,
                         date=post_date, user_id=user.id,
                         feed_id=feed.id)
@@ -172,7 +177,7 @@ def insert_database(user, feed):
     query1 = Article.query.filter(Article.user_id == user.id)
     query2 = query1.filter(Article.feed_id == feed.id)
     for article in articles:
-        exist = query2.filter(Article.link == article.link).count() != 0
+        exist = query2.filter(Article.entry_id == article.entry_id).count() != 0
         if exist:
             #logger.debug("Article %r (%r) already in the database.", article.title, article.link)
             continue
diff --git a/pyaggr3g470r/models.py b/pyaggr3g470r/models.py
index 21fedde4..b7a75d5f 100644
--- a/pyaggr3g470r/models.py
+++ b/pyaggr3g470r/models.py
@@ -121,6 +121,7 @@ class Article(db.Model):
     Represent an article from a feed.
     """
     id = db.Column(db.Integer, primary_key = True)
+    entry_id = db.Column(db.String())
     link = db.Column(db.String())
     title = db.Column(db.String())
     content = db.Column(db.String())
-- 
cgit