Skip to content
This repository was archived by the owner on Jan 22, 2026. It is now read-only.

Commit a38e07e

Browse files
authored
Add support for python3 (#208)
and make python3 default in docker
1 parent 51edf77 commit a38e07e

23 files changed

Lines changed: 202 additions & 154 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
.pytest_cache/
1515
python_moztelemetry.egg-info/
1616
docs/_build/
17+
coverage.xml

.travis.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
1+
env:
2+
- PYTHON_VERSION=2.7
3+
- PYTHON_VERSION=3.5
4+
- PYTHON_VERSION=3.6
15
sudo: required
26
language: python
37
services:
48
- docker
59
before_install:
610
- env | grep TRAVIS > .travis-env
7-
- docker build -t moztelemetry_docker .
11+
- docker build -t moztelemetry_docker:python-$PYTHON_VERSION --build-arg PYTHON_VERSION=$PYTHON_VERSION .
812
script:
9-
- docker run --env-file .travis-env moztelemetry_docker ./runtests.sh -v --timeout=120
13+
# mount coverage.xml as a file into the container to retrieve coverage report
14+
- touch coverage.xml
15+
- docker run --env-file .travis-env -v $PWD/coverage.xml:/python_moztelemetry/coverage.xml moztelemetry_docker:python-$PYTHON_VERSION ./runtests.sh -v --timeout=120 tests
16+
after_success:
17+
- bash <(curl -s https://codecov.io/bash) -F python${PYTHON_VERSION//.}
1018
deploy:
1119
provider: pypi
1220
user: fbertsch

Dockerfile

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,8 @@
11
FROM openjdk:8
22

3-
ENV SPARK_VERSION=2.0.2
4-
53
# install gcc
64
RUN apt-get update --fix-missing && \
7-
apt-get install -y \
8-
g++ libpython-dev libsnappy-dev \
9-
build-essential libssl-dev libffi-dev git
5+
apt-get install -y g++
106

117
# setup conda environment
128
# temporary workaround, pin miniconda version until it's fixed.
@@ -25,7 +21,9 @@ RUN hash -r && \
2521

2622
# build + activate conda environment
2723
COPY ./environment.yml /python_moztelemetry/
28-
RUN conda env create -f /python_moztelemetry/environment.yml
24+
ARG PYTHON_VERSION=2.7
25+
RUN echo "- python=$PYTHON_VERSION" >> /python_moztelemetry/environment.yml && \
26+
conda env create -f /python_moztelemetry/environment.yml
2927

3028
# this is roughly equivalent to activating the conda environment
3129
ENV PATH="/miniconda/envs/test-environment/bin:${PATH}"
@@ -40,4 +38,4 @@ RUN pip install 'pytest>=3'
4038
COPY . /python_moztelemetry
4139

4240
# install moztelemetry specific deps into conda env
43-
RUN pip install /python_moztelemetry/ --process-dependency-links
41+
RUN pip install .[test]

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
# python_moztelemetry [![Build Status](https://travis-ci.org/mozilla/python_moztelemetry.svg?branch=master)](https://travis-ci.org/mozilla/python_moztelemetry) [![Documentation Status](http://readthedocs.org/projects/python_moztelemetry/badge/?version=latest)](https://python_moztelemetry.readthedocs.io/?badge=latest) [![Updates](https://pyup.io/repos/github/mozilla/python_moztelemetry/shield.svg)](https://pyup.io/repos/github/mozilla/python_moztelemetry/)
2+
# python_moztelemetry [![Build Status](https://travis-ci.org/mozilla/python_moztelemetry.svg?branch=master)](https://travis-ci.org/mozilla/python_moztelemetry) [![Documentation Status](http://readthedocs.org/projects/python_moztelemetry/badge/?version=latest)](https://python_moztelemetry.readthedocs.io/?badge=latest) [![Updates](https://pyup.io/repos/github/mozilla/python_moztelemetry/shield.svg)](https://pyup.io/repos/github/mozilla/python_moztelemetry/) [![codecov.io](https://codecov.io/github/mozilla/python_moztelemetry/coverage.svg?branch=master)](https://codecov.io/github/mozilla/python_moztelemetry?branch=master)
33

44
Spark bindings for Mozilla Telemetry
55

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: test-environment
22
dependencies:
3-
- python=2.7
43
- pandas
54
- pyspark
65
- python-snappy
76
- snappy
87
- scipy
8+
- gcc

moztelemetry/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
# flake8: noqa
2-
from spark import *
3-
from histogram import *
2+
from .spark import *
3+
from .histogram import *

moztelemetry/dataset.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# License, v. 2.0. If a copy of the MPL was not distributed with this
33
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
44
from __future__ import division, print_function
5-
import copy_reg
65
import functools
76
import json
87
import random
@@ -12,6 +11,7 @@
1211
from inspect import isfunction
1312
from itertools import chain
1413
from multiprocessing import cpu_count
14+
from six.moves import copyreg
1515

1616
import jmespath
1717
from concurrent import futures
@@ -52,7 +52,7 @@ def _pickle_method(m):
5252
return getattr, (m.im_self, m.im_func.func_name)
5353

5454

55-
copy_reg.pickle(types.MethodType, _pickle_method)
55+
copyreg.pickle(types.MethodType, _pickle_method)
5656

5757

5858
class Dataset:
@@ -126,14 +126,15 @@ def select(self, *properties, **aliased_properties):
126126
"""
127127
if not (properties or aliased_properties):
128128
return self
129-
properties_pairs = zip(properties, properties)
130-
merged_properties = dict(properties_pairs + aliased_properties.items())
129+
merged_properties = dict(zip(properties, properties))
130+
merged_properties.update(aliased_properties)
131131

132132
for prop_name in (merged_properties.keys()):
133133
if prop_name in self.selection:
134134
raise Exception('The property {} has already been selected'.format(prop_name))
135135

136-
new_selection = dict(self.selection.items() + merged_properties.items())
136+
new_selection = self.selection.copy()
137+
new_selection.update(merged_properties)
137138

138139
return Dataset(self.bucket, self.schema, store=self.store, prefix=self.prefix,
139140
clauses=self.clauses, selection=new_selection)
@@ -270,7 +271,7 @@ def records(self, sc, limit=None, sample=1, seed=42, decode=None, summaries=None
270271
random.setstate(seed_state)
271272

272273
# Obtain size in MB
273-
total_size = reduce(lambda acc, item: acc + item['size'], summaries, 0)
274+
total_size = functools.reduce(lambda acc, item: acc + item['size'], summaries, 0)
274275
total_size_mb = total_size / float(1 << 20)
275276
print("fetching %.5fMB in %s files..." % (total_size_mb, len(summaries)))
276277

@@ -305,10 +306,10 @@ def from_source(source_name):
305306
store = S3Store(meta_bucket)
306307

307308
try:
308-
source = json.loads(store.get_key('sources.json').read())[source_name]
309+
source = json.loads(store.get_key('sources.json').read().decode('utf-8'))[source_name]
309310
except KeyError:
310311
raise Exception('Unknown source {}'.format(source_name))
311312

312-
schema = store.get_key('{}/schema.json'.format(source['metadata_prefix'])).read()
313+
schema = store.get_key('{}/schema.json'.format(source['metadata_prefix'])).read().decode('utf-8')
313314
dimensions = [f['field_name'] for f in json.loads(schema)['dimensions']]
314315
return Dataset(source['bucket'], dimensions, prefix=source['prefix'])

moztelemetry/heka/message_parser.py

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
import ssl
66
import struct
77

8-
import boto
8+
import boto.s3.key
99
import snappy
1010
import ujson as json
1111
import json as standard_json
1212
import zlib
13-
from cStringIO import StringIO
13+
from io import BytesIO
1414
from google.protobuf.message import DecodeError
1515

1616
from .message_pb2 import Message, Header
@@ -42,17 +42,18 @@ def _parse_heka_record(record):
4242
# messages is an unprocessed form of the data, usually the original
4343
# gzipped payload from the client.
4444
#
45-
# We attempt to decompress it, and if that fails,
46-
# attempt to decode it as a UTF-8 string.
45+
# We decompress it if we can, then try to decode it as a UTF-8 string.
4746
elif field.name == 'content':
4847
try:
49-
string = zlib.decompress(field.value_bytes[0], 16+zlib.MAX_WBITS)
50-
except zlib.error:
48+
string = field.value_bytes[0]
5149
try:
52-
string = field.value_bytes[0].decode('utf-8')
53-
except UnicodeDecodeError:
54-
# There is no associated payload
55-
break
50+
string = zlib.decompress(string, 16+zlib.MAX_WBITS)
51+
except zlib.error:
52+
pass # not compressed
53+
string = string.decode('utf-8')
54+
except UnicodeDecodeError:
55+
# There is no associated payload
56+
break
5657
payload = {"content": string}
5758
break
5859

@@ -123,7 +124,7 @@ def _parse_json(string):
123124
class BacktrackableFile:
124125
def __init__(self, stream):
125126
self._stream = stream
126-
self._buffer = StringIO()
127+
self._buffer = BytesIO()
127128

128129
def read(self, size):
129130
buffer_data = self._buffer.read(size)
@@ -151,7 +152,7 @@ def backtrack(self):
151152
buf = self._buffer.getvalue()
152153
index = buf.find(chr(_record_separator), 1)
153154

154-
self._buffer = StringIO()
155+
self._buffer = BytesIO()
155156
if index >= 0:
156157
self._buffer.write(buf[index:])
157158
self._buffer.seek(0)
@@ -170,7 +171,7 @@ def read_until_next(fin, separator=_record_separator):
170171
bytes_skipped = 0
171172
while True:
172173
c = fin.read(1)
173-
if c == '':
174+
if len(c) == 0:
174175
return bytes_skipped, True
175176
elif ord(c) != separator:
176177
bytes_skipped += 1
@@ -196,7 +197,7 @@ def read_one_record(input_stream, raw=False, verbose=False, strict=False, try_sn
196197
if strict:
197198
raise ValueError("Unexpected character(s) at the start of record")
198199
if verbose:
199-
print "Skipped", skipped, "bytes to find a valid separator"
200+
print("Skipped %s bytes to find a valid separator" % skipped)
200201

201202
raw_record = struct.pack("<B", 0x1e)
202203

@@ -223,9 +224,9 @@ def read_one_record(input_stream, raw=False, verbose=False, strict=False, try_sn
223224
header.ParseFromString(header_raw)
224225
unit_separator = input_stream.read(1)
225226
total_bytes += 1
226-
if ord(unit_separator[0]) != 0x1f:
227+
if ord(unit_separator) != 0x1f:
227228
error_msg = "Unexpected unit separator character at offset {}: {}".format(
228-
total_bytes, ord(unit_separator[0])
229+
total_bytes, ord(unit_separator)
229230
)
230231
if strict:
231232
raise ValueError(error_msg)
@@ -262,23 +263,23 @@ def unpack_file(filename, **kwargs):
262263

263264

264265
def unpack_string(string, **kwargs):
265-
return unpack(StringIO(string), **kwargs)
266+
return unpack(BytesIO(string), **kwargs)
266267

267268

268269
def unpack(fin, raw=False, verbose=False, strict=False, backtrack=False, try_snappy=True):
269270
record_count = 0
270271
total_bytes = 0
271272

272273
while True:
273-
r = None
274+
r, size = None, 0
274275
try:
275276
r, size = read_one_record(fin, raw, verbose, strict, try_snappy)
276277
except Exception as e:
277278
if strict:
278279
fin.close()
279280
raise e
280281
elif verbose:
281-
print e
282+
print(e)
282283

283284
if backtrack and type(e) == DecodeError:
284285
fin.backtrack()
@@ -288,14 +289,14 @@ def unpack(fin, raw=False, verbose=False, strict=False, backtrack=False, try_sna
288289
break
289290

290291
if verbose and r.error is not None:
291-
print r.error
292+
print(r.error)
292293

293294
record_count += 1
294295
total_bytes += size
295296

296297
yield r, total_bytes
297298

298299
if verbose:
299-
print "Processed", record_count, "records"
300+
print("Processed %s records" % record_count)
300301

301302
fin.close()

moztelemetry/histogram.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,20 @@
88
from __future__ import division
99

1010
import requests
11-
import parse_histograms
1211
import re
1312
import pandas as pd
1413
import numpy as np
1514
import ujson as json
1615

17-
from functools32 import lru_cache
16+
from . import parse_histograms
1817
from expiringdict import ExpiringDict
1918

19+
# python 2 and 3 compatiblity
20+
try:
21+
from functools32 import lru_cache
22+
except ImportError:
23+
from functools import lru_cache
24+
2025
HISTOGRAMS_JSON_REVISION = "https://hg.mozilla.org/mozilla-central/rev/tip"
2126
HISTOGRAMS_JSON_PATH = "/toolkit/components/telemetry/Histograms.json"
2227
CATEGORICAL_HISTOGRAM_SPILL_BUCKET_NAME = 'spill'
@@ -62,9 +67,9 @@ def _fetch_histograms_definition(url):
6267
definition = requests.get(url).content
6368

6469
# see bug 920169
65-
definition = definition.replace('"JS::gcreason::NUM_TELEMETRY_REASONS"', "101")
66-
definition = definition.replace('"mozilla::StartupTimeline::MAX_EVENT_ID"', "12")
67-
definition = definition.replace('"80 + 1"', "81")
70+
definition = definition.replace(b'"JS::gcreason::NUM_TELEMETRY_REASONS"', b"101")
71+
definition = definition.replace(b'"mozilla::StartupTimeline::MAX_EVENT_ID"', b"12")
72+
definition = definition.replace(b'"80 + 1"', b"81")
6873

6974
parsed = json.loads(definition)
7075
parsed.update(histogram_exceptions)
@@ -186,7 +191,7 @@ def get_value(self, only_median=False, autocast=True):
186191
elif self.kind == "categorical" and not only_median:
187192
return self.buckets
188193
elif self.kind == "count":
189-
return long(self.buckets[0])
194+
return int(self.buckets[0])
190195
elif self.kind == "flag":
191196
return self.buckets[1] == 1
192197
else:

0 commit comments

Comments
 (0)