bdappv/export-data.py at main · gabrielkasmi/bdappv · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
# This script extract input data from SQL db and dumps it into JSON
#

import os

import pymysql
from dotenv import load_dotenv

from lib.model import *
import argparse

# For surface, image was displayed on 500 px instead of 400
from lib.utils import Campaign

SURFACE_RATIO = 0.8

CLICK_SRC_IMAGES = {
   Campaign.GOOGLE : 1,
   Campaign.IGN:2
}

POLYGON_SRC_IMAGES = {
   Campaign.GOOGLE : 4,
   Campaign.IGN:8, # TODO: to be defined, not sure
}

SQL_CLICK="""
   SELECT
      action.coordX as x,
      action.coordY as y,
      action.pv as isPV,
      bosseurAction.idImage as imageId,
      bosseurAction.dateAction as actionDate,
      bosseur.idBosseur as actorId,
      bosseur.region as actorRegion,
      bosseur.pays as actorCountry
   FROM  prd_bdappv_actionPV as action
   LEFT JOIN prd_bdappv_bosseurAction as bosseurAction ON action.idActionPV = bosseurAction.idAction
   LEFT JOIN prd_bdappv_bosseur as bosseur ON bosseurAction.idBosseur = bosseur.idBosseur
   WHERE bosseurAction.sourceImg = {SourceImg}
"""

SQL_POLYGON="""
   SELECT
      point.coordX as x,
      point.coordY as y,
      point.idSurface as polygonId,
      bosseurAction.idImage as imageId,
      bosseurAction.dateAction as actionDate,
      bosseur.idBosseur as actorId,
      bosseur.region as actorRegion,
      bosseur.pays as actorCountry
   FROM prd_bdappv_zoneSurfacePoint as point
   LEFT JOIN prd_bdappv_zoneSurface as surface ON point.idSurface = surface.idSurface
   LEFT JOIN prd_bdappv_bosseurAction as bosseurAction ON surface.idActionSurface = bosseurAction.idAction
   LEFT JOIN prd_bdappv_bosseur as bosseur ON bosseurAction.idBosseur = bosseur.idBosseur
   WHERE bosseurAction.sourceImg = {SourceImg}
   ORDER BY point.idSurfacePoint
"""

SQL_IMG="""
   SELECT
         image.idImage as id,
         image.identifiant as img_id,
         install.ville as city,
         install.id_utilisateur as install_id,
         dep.region as region,
         dep.nom as department
   FROM prd_bdappv_image as image
   LEFT JOIN pv_installation as install ON image.idInstallation = install.id_utilisateur
   LEFT JOIN pv_departement as dep ON dep.numero = install.departement"""


def load_imgs(campaign, filter):
   """Load image data from DB"""
   imgs = dict()

   conn = pymysql.connect(
      host=os.environ["DB_HOST"],
      port=int(os.environ["DB_PORT"]),
      user=os.environ["DB_USER"],
      password=os.environ["DB_PASS"],
      database=os.environ["DB_NAME"],
      cursorclass=pymysql.cursors.DictCursor)

   with conn :
      # Fetch all images
      with conn.cursor() as cursor :
         cursor.execute(SQL_IMG)
         for row in cursor :
            img = Image(row["img_id"], row["city"], row["department"], row["region"], row["install_id"])
            imgs[row["id"]] = img

      # Fetch clicks
      with conn.cursor() as cursor:
         cursor.execute(SQL_CLICK.format(SourceImg=CLICK_SRC_IMAGES[campaign]))
         for row in cursor:
            action = Action(row["actorCountry"], row["actorRegion"], row["actionDate"], row["actorId"])
            imageId = row["imageId"]

            if imageId is None :
               print("Bad action : ", row)
               continue

            if row["isPV"] :
               click = Click(row["x"], row["y"], action)
               imgs[imageId].clicks.append(click)
            else :
               imgs[imageId].notPvActions.append(action)

      # Fetch polygons
      with conn.cursor() as cursor:
         cursor.execute(SQL_POLYGON.format(SourceImg=POLYGON_SRC_IMAGES[campaign]))

         # Dict of polygonId => (polygon)
         polygons = dict()
         for row in cursor:
            action = Action(row["actorCountry"], row["actorRegion"], row["actionDate"], row["actorId"])
            imageId = row["imageId"]

            polygonId = row["polygonId"]
            if polygonId in polygons :
               polygon = polygons[polygonId]
            else:
               # Ne polygon ? Create it and add it to image
               polygon = Polygon(action)
               polygons[polygonId] = polygon
               imgs[imageId].polygons.append(polygon)

            point = Point(
               int(row["x"] * SURFACE_RATIO),
               int(row["y"] * SURFACE_RATIO))
            polygon.points.append(point)

   # Filter images having at least one click
   if filter :
      res = list(img for img in imgs.values() if ((len(img.clicks) > 0) or (len(img.polygons) > 0)))
   else:
      res = list(imgs.values())

   return res


if __name__ == '__main__':

   parser = argparse.ArgumentParser()
   parser.add_argument('output_file', type=str, metavar="out.json", help="Output file")
   parser.add_argument('--campaign', '-c', type=str, choices=[Campaign.IGN, Campaign.GOOGLE], help="Campaign : either 'google' (default) or 'ign'", default=Campaign.GOOGLE)
   parser.add_argument('--filter', '-f', action="store_true",
                       help="Filter out images with no click nor polygon", default=False)
   args = parser.parse_args()

   load_dotenv()
   imgs = load_imgs(args.campaign, args.filter)

   print("Found %d images" % len(imgs))

   to_json(imgs, args.output_file)