Tasks analysis

Mix.install([
  {:merquery, "~> 0.3.0"},
  {:req, "~> 0.5.16"},
  {:kino, "~> 0.18.0"},
  {:vega_lite, "~> 0.1.11"},
  {:kino_vega_lite, "~> 0.1.13"},
])

Введение

File.cwd!

Для разработки программного средства для аналитики задач был выбран язык Elixir и проект Livebook.

Для визуализации используется библиотека VegaLite

alias VegaLite, as: Vl

IO.puts "Hello, World"

Для выполнения HTTP запросов в проект добавлены библиотеки

req http client
merquery smart-cell обёртка над req

{:ok, %{status: st}} = Req.get("https://api.github.com/repos/elixir-lang/elixir")
st

0. Общие для всех запросов параметры. Модуль загрузки данных из Jira

jira_url = "https://issues.apache.org/jira/rest/api/2"
kafka = "KAFKA"
headers = [{"Accept", "application/json"}]

"#{jira_url}/project/#{kafka}"

defmodule JiraFetcher do
  def fetch_page(base_url, headers, jql, start_at, fields, expand \\ nil) do
    url = "#{base_url}/search"
    params = [
      jql: jql,
      startAt: start_at,
      maxResults: 100,
      fields: fields,
    ]
    params =
      if expand do
        Keyword.put(params, :expand, expand)
      else
        params
      end
    
    case Req.get(url, headers: headers, params: params) do
      {:ok, %{status: 200, body: body}} -> {:ok, body}
      {:ok, response} ->
        IO.inspect(response, label: "Ошибка запроса")
        {:error, response}
    end
  end

  def fetch_all_closed_issues_with_history(base_url, headers, project_key) do
    jql = "project = #{project_key} AND statusCategory = Done"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        case fetch_page(base_url, headers, jql, start_at, "created,status,updated,resolutiondate", "changelog") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_issues(base_url, headers, project_key) do
    jql = "project = #{project_key}"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        case fetch_page(base_url, headers, jql, start_at, "created,status,updated,resolutiondate", "changelog") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_issues_with_users(base_url, headers, project_key) do
    jql = "project = #{project_key}"
    
    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        # Запрашиваем поля assignee (исполнитель) и reporter (репортер)
        case fetch_page(base_url, headers, jql, start_at, "assignee,reporter") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_closed_issues_with_worklogs(base_url, headers, project_key) do
    jql = "project = #{project_key} AND statusCategory = Done"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        # Запрашиваем worklog для каждой задачи
        case fetch_page(base_url, headers, jql, start_at, "worklog") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_issues_with_priorities(base_url, headers, project_key) do
    jql = "project = #{project_key}"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        case fetch_page(base_url, headers, jql, start_at, "priority") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end
end

1. Время, которое задача провела от момента создания до момента закрытия

Построение гистограммы, отражающей время, которое задача провела в открытом состоянии (от момента создания до момента закрытия). По оси абсцисс – расположить время. По оси ординат – расположить суммарное количество задач, которое было в открытом виде соответствующее время. В расчет брать только закрытые задачи по проекту;

all_closed_issues_cache_path = Path.join(File.cwd!(), "all_closed_issues.json") 

all_closed_issues =
  if File.exists?(all_closed_issues_cache_path) do
    all_closed_issues_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_closed_issues_with_history(jira_url, headers, kafka)
    File.write!(all_closed_issues_cache_path, JSON.encode!(result))
    result
  end

all_closed_issues
|> length() |> IO.puts()

issue = Enum.at(all_closed_issues, 10)

# Map of `Livespan => Number of issues with that timespan`
frequencies =
  all_closed_issues
  |> Enum.reduce([], fn issue, acc ->
    {:ok, created_at, _} = DateTime.from_iso8601(issue["fields"]["created"])
    {:ok, closed_at, _} = DateTime.from_iso8601(issue["fields"]["resolutiondate"])
    time_to_close = DateTime.diff(closed_at, created_at, :day)
    [time_to_close | acc ]
  end)
  |> Enum.frequencies()

{min_livespan, max_livespan} = Map.keys(frequencies) |> Enum.min_max()
x_data = min_livespan..max_livespan

y_data =
  Enum.reduce(x_data, [], fn livespan, acc ->
    [Map.get(frequencies, livespan, 0) | acc]
  end)
  |> Enum.reverse()

first_chart_data = %{
  x_data: x_data,
  y_data: y_data
}

clip_after = 100

Vl.new(width: 700, height: 500, title: "Распределение времени жизни задач в открытом состоянии")
|> Vl.data_from_values(first_chart_data, only: ["x_data", "y_data"])
|> Vl.mark(:bar)
# |> Vl.transform(filter: "datum.x_data <= #{clip_after}")
# |> Vl.encode_field(:x, "x_data", type: :quantitative, title: "Время", scale: [domain_min: 0, domain_max: clip_after, zero: true])
|> Vl.encode_field(:x, "x_data", type: :quantitative, title: "Время в открытом состоянии (дни)", scale: [type: :pow, exponent: 0.5, domain_min: 0, zero: true])
|> Vl.encode_field(:y, "y_data", type: :quantitative, title: "Частота встречаемости")

2. Распределение времени жизни задач в определённом статусе

построить диаграммы, которые показывают распределение времени по состояниям задачи. По оси абсцисс – расположить время. По оси ординат – расположить суммарное количество задач, которое было в открытом виде соответствующее время. В расчет брать только закрытые задачи по проекту. Для каждого состояния должна строиться своя диаграмма

issues_with_history = all_closed_issues

defmodule IssueParser do
  def get_datetime!(issue, field_type) do
    {:ok, datetime, _} = DateTime.from_iso8601(issue["fields"][field_type])
    datetime
  end
end

defmodule StatusTimeCalculator do
  def total_time_in_status(data, "Open") do
    opened_at = data["created"]
    changed_from_open =
      data["changes"]
      |> Enum.find(fn entry ->
        entry["fromString"] == "Open"   
      end)
      |> get_in(["created"])
    
    DateTime.diff(changed_from_open, opened_at, :second)
  end
  
  def total_time_in_status(data, target_status) do
    # Извлекаем массив переходов из ключа "changes"
    transitions = Map.get(data, "changes", [])
    
    transitions
    |> Enum.sort_by(& &1["created"], DateTime)
    |> find_time_spans(target_status)
    |> Enum.reduce(0, fn {start_time, end_time}, acc ->
      DateTime.diff(end_time, start_time, :second) + acc
    end)
  end

  def find_time_spans(sorted_transitions, target_status) do
    {_, spans} = 
      Enum.reduce(sorted_transitions, {nil, []}, fn 
        transition, {current_status, acc} ->
          # Если входим в целевой статус
          if transition["toString"] == target_status do
            {target_status, [{transition["created"], nil} | acc]}
          
          # Если выходим из целевого статуса
          else if transition["fromString"] == target_status do
            # Обновляем последний промежуток с конечным временем
            updated_acc = update_last_span(acc, transition["created"])
            {nil, updated_acc}
          
          else
            {current_status, acc}
          end
        end
      end)
    
    # Фильтруем только завершенные промежутки
    Enum.filter(spans, fn
      {_start, nil} -> false
      {_, _} -> true
    end)
  end

  defp update_last_span([], _end_time), do: []
  defp update_last_span([{start, nil} | rest], end_time) do
    [{start, end_time} | rest]
  end
  defp update_last_span([span | rest], end_time) do
    [span | update_last_span(rest, end_time)]
  end
end

IO.puts(Enum.count(issues_with_history))

# Проверка на то, что во всех подтянутых задачах подтянулась вся история изменений.
# Ответ: Да, во всех закрытых подтягивается полностью
# Enum.each(issues_with_history, fn issue ->
#   total = issue["changelog"]["total"]
#   loaded_total = issue["changelog"]["histories"] |> Enum.count()
#   if total != loaded_total do
#     false
#   end
# end)

defmodule ViewStatusTimeDistribution do
  def has_status(issue, status) do
    with %{"changelog" => %{"histories" => histories}} <- issue do
      Enum.any?(histories, fn history ->
        items = history["items"]
        Enum.any?(items, fn item ->
          if item["field"] == "status" do
            item["toString"] == status
          else
            false
          end
        end)
      end)
    else
      _ -> false
    end
  end

  def get_status_time_distribution(issues_with_history, status) do
    issues_with_desired_status =
      Enum.filter(issues_with_history, fn issue -> has_status(issue, status) end)
    
    frequencies =
      issues_with_desired_status
      |> Enum.reduce([], fn issue, acc ->
        status_changes =
          issue["changelog"]["histories"]
          |> Enum.filter(fn history ->
            Enum.any?(history["items"], fn item ->
              if item["field"] == "status" do
                true
              else
                false
              end
            end)
          end)
          |> Enum.flat_map(fn entry ->
            entry["items"]
            |> Enum.filter(fn item ->
              item["fromString"] && item["toString"] && item["field"] == "status"
            end)
            |> Enum.map(fn item ->
              %{
                "created" => entry["created"],
                "fromString" => item["fromString"],
                "toString" => item["toString"]
              }
            end)
          end)
          |> Enum.map(fn change ->
            {:ok, from, _} = DateTime.from_iso8601(change["created"])
            %{change | "created" => from}
          end)
        
        issue_history = %{
          "created" => IssueParser.get_datetime!(issue, "created"),
          "changes" => status_changes
        }
        seconds_in_status = StatusTimeCalculator.total_time_in_status(issue_history, status)
        [trunc(seconds_in_status / 86400) | acc]
      end)
      |> Enum.frequencies()
    
    {min_livespan, max_livespan} = Map.keys(frequencies) |> Enum.min_max()
    x_data = min_livespan..max_livespan
    
    y_data =
      Enum.reduce(x_data, [], fn livespan, acc ->
        [Map.get(frequencies, livespan, 0) | acc]
      end)
      |> Enum.reverse()
    
    second_chart_data = %{
      x_data: x_data,
      y_data: y_data
    }
    view(second_chart_data, status)
  end
  
  def view(chart_data, status) do
    clip_after = 100

    Vl.new(width: 700, height: 500, title: "Распределение времени жизни задач в статусе #{status}")
    |> Vl.data_from_values(chart_data, only: ["x_data", "y_data"])
    |> Vl.mark(:bar)
    # |> Vl.transform(filter: "datum.x_data <= #{clip_after}")
    # |> Vl.encode_field(:x, "x_data", type: :quantitative, title: "Время в состоянии (дни)", scale: [domain_min: 0, domain_max: clip_after, zero: true])
      |> Vl.encode_field(:x, "x_data", 
          type: :quantitative, title: "Время в состоянии (дни)",
          scale: [type: :pow, exponent: 0.5, domain_min: 0, zero: true]
      )
    |> Vl.encode_field(:y, "y_data", type: :quantitative, title: "Частота встречаемости")

  end
end

statuses = [
  "Open",
  "In Progress",
  "Reopened",
  "Resolved",
  "Closed",
  "Patch Available"
]
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Open")

ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "In Progress")

ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Reopened")

ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Resolved")

ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Closed")

ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Patch Available")

3. Количество открытых и закрытых задач по дням и накопительный итог

построить график, показывающий количество заведенных и закрытых задач в день. График должен отражать не только информацию о количестве задач в день, но и накопительный итог по задачам. По оси абсцисс – расположить календарные дни. По оси ординат – расположить суммарное количество задач для этого дня (открытые, закрытые и для каждого – накопительный итог);

all_issues_cache_path = Path.join(File.cwd!(), "all_issues.json") 

all_issues =
  if File.exists?(all_issues_cache_path) do
    all_issues_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_issues(jira_url, headers, kafka)
    File.write!(all_issues_cache_path, JSON.encode!(result))
    result
  end

Enum.count(all_issues) |> IO.puts # 18723. Yes we got them all

defmodule DailyTaskStats do
  def calculate_daily_stats(issues) do
    # Парсим даты создания и закрытия
    issues
    |> Enum.map(fn issue ->
      {:ok, created_dt, _} = DateTime.from_iso8601(issue["fields"]["created"])
      created_date = Date.to_iso8601(Date.from_erl!({created_dt.year, created_dt.month, created_dt.day}))
      
      closed_date = case issue["fields"]["resolutiondate"] do
        nil -> nil
        date_str -> 
          {:ok, closed_dt, _} = DateTime.from_iso8601(date_str)
          Date.to_iso8601(Date.from_erl!({closed_dt.year, closed_dt.month, closed_dt.day}))
      end
      
      %{created: created_date, closed: closed_date}
    end)
  end

   def aggregate_daily_data(stats) do
    # Инициализируем пустую мапу для агрегации
    # %{
    #   date: "2011-08-10",
    #   created_daily: 1,
    #   closed_daily: 2,
    #   created_cumulative: 93,
    #   closed_cumulative: 56
    # }
    daily_data = %{}
    
    # Агрегируем по дням
    daily_data = Enum.reduce(stats, daily_data, fn stat, acc ->
      # Увеличиваем счетчик созданных задач
      created_count = Map.get(acc, stat.created, %{created: 0, closed: 0})
      created_count = %{created_count | created: created_count.created + 1}
      acc = Map.put(acc, stat.created, created_count)
      
      # Если задача закрыта, увеличиваем счетчик закрытых
      if stat.closed do
        closed_count = Map.get(acc, stat.closed, %{created: 0, closed: 0})
        closed_count = %{closed_count | closed: closed_count.closed + 1}
        Map.put(acc, stat.closed, closed_count)
      else
        acc
      end
    end)
    
    # Сортируем даты по возрастанию
    sorted_dates = Map.keys(daily_data) |> Enum.sort()
    
    # Вычисляем накопительные итоги
    cumulative_data = Enum.reduce(
        sorted_dates, {[], 0, 0},
        fn date, {acc, cum_created, cum_closed} ->
          day_data = Map.get(daily_data, date)
          new_cum_created = cum_created + day_data.created
          new_cum_closed = cum_closed + day_data.closed
          
          day_record = %{
            date: date,
            created_daily: day_data.created,
            closed_daily: day_data.closed,
            created_cumulative: new_cum_created,
            closed_cumulative: new_cum_closed
          }
          
          {[day_record | acc], new_cum_created, new_cum_closed}
        end
    )
    
    # Возвращаем данные в правильном порядке
    elem(cumulative_data, 0) |> Enum.reverse()
  end

  def create_separate_chart_data(daily_data) do
    # Преобразуем данные в отдельные наборы для каждого типа
    daily_series = 
      Enum.flat_map(daily_data, fn day ->
        [
          %{date: day.date, value: day.created_daily, type: "Создано"},
          %{date: day.date, value: day.closed_daily, type: "Закрыто"}
        ]
      end)
    
    cumulative_series = 
      Enum.flat_map(daily_data, fn day ->
        [
          %{date: day.date, value: day.created_cumulative, type: "Создано"},
          %{date: day.date, value: day.closed_cumulative, type: "Закрыто"}
        ]
      end)
    {daily_series, cumulative_series}
  end
end

stats = DailyTaskStats.calculate_daily_stats(all_issues)
daily_data = DailyTaskStats.aggregate_daily_data(stats)
{daily_series, cumulative_series} = DailyTaskStats.create_separate_chart_data(daily_data)

defmodule DateRangeFilter do
  def filter_by_date_range(list, start_date, end_date) do
    Enum.filter(list, fn %{date: date} ->
      date >= start_date and date <= end_date
    end)
  end
  
  def filter_two_arrays(array1, array2, start_date, end_date) do
    {
      filter_by_date_range(array1, start_date, end_date),
      filter_by_date_range(array2, start_date, end_date)
    }
  end
end

start_date = "2020-09-01"
finish_date = "2025-12-01"
{daily_series, cumulative_series} =
  DateRangeFilter.filter_two_arrays(daily_series, cumulative_series, start_date, finish_date)

max_cumulative_value = Enum.max_by(cumulative_series, &(&1.value)).value
min_cumulative_value = Enum.min_by(cumulative_series, &(&1.value)).value

Vl.new(width: 600, height: 300, title: ["Количество открытых и закрытых задач по дням и накопительный итог", "с #{start_date} по #{finish_date}"])
|> Vl.data_from_values(daily_series, only: ["date"])
|> Vl.encode_field(:x, "date", type: :temporal, title: "Дни")
|> Vl.layers([
  Vl.new()
  |> Vl.data_from_values(daily_series, only: ["date", "value", "type"])
  |> Vl.mark(:rule)
  |> Vl.encode_field(:y, "value", type: :quantitative, title: "Динамика задач в день")
  |> Vl.encode_field(:color, "type", type: :nominal, title: "Тип задачи"),
  Vl.new()
  |> Vl.data_from_values(cumulative_series, only: ["date", "value", "type"])
  |> Vl.mark(:line)
  |> Vl.encode_field(:y, "value", type: :quantitative, title: "Задач в день накопительно") # , scale: [domain_min: min_cumulative_value, domain_max: max_cumulative_value, range_min: 160]
  |> Vl.encode_field(:color, "type", type: :nominal, title: "Тип задачи"),
])
|> Vl.resolve(:scale, y: :independent)

4. Самые активные пользователи

построить график, выражающий общее количество задач для пользователей, в которых он указан как исполнитель и репортер. По оси абсцисс – расположить количество задач. По оси ординат – расположить имя пользователя. Отразить график для 30 топовых пользователей с максимальным количеством задач;

all_issues_with_users_cache_path = Path.join(File.cwd!(), "all_issues_with_users.json") 

all_issues_with_users =
  if File.exists?(all_issues_with_users_cache_path) do
    all_issues_with_users_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_issues_with_users(jira_url, headers, kafka)
    File.write!(all_issues_with_users_cache_path, JSON.encode!(result))
    result
  end

defmodule UserTaskStats do
  def calculate_user_stats(issues) do
    # Собираем статистику по пользователям
    Enum.reduce(issues, %{}, fn issue, acc ->
      # Обрабатываем assignee (исполнитель)
      assignee_key = get_user_key(issue["fields"]["assignee"])
      reporter_key = get_user_key(issue["fields"]["reporter"])

      if assignee_key == reporter_key do
        update_user_stats(acc, assignee_key, :both_count)
      else
        acc =
          if assignee_key do
            update_user_stats(acc, assignee_key, :assignee_count)
          else
            acc
          end

        if reporter_key do
          update_user_stats(acc, reporter_key, :reporter_count)
        else
          acc
        end
      end
    end)
  end

  defp get_user_key(user_data) do
    case user_data do
      %{"key" => key, "displayName" => name} -> 
        {key, name}
      %{"key" => key} -> 
        {key, key}
      %{"displayName" => name} -> 
        {name, name}
      _ ->
        nil
    end
  end

  defp update_user_stats(acc, {user_key, user_name}, type) do
    current =
      Map.get(acc, user_key, %{name: user_name, assignee_count: 0, reporter_count: 0, total: 0})

    updated =
      case type do
        :assignee_count ->
          %{current | assignee_count: current.assignee_count + 1}

        :reporter_count ->
          %{current | reporter_count: current.reporter_count + 1}

        :both_count ->
          %{
            current
            | assignee_count: current.assignee_count + 1,
              reporter_count: current.reporter_count + 1
          }
      end

    new_total =
      case type do
        :both_count -> updated.assignee_count + updated.reporter_count - 1
        _ -> updated.assignee_count + updated.reporter_count
      end

    updated = %{updated | total: new_total}

    Map.put(acc, user_key, updated)
  end

  def get_top_users(user_stats, limit \\ 30) do
    user_stats
    |> Map.values()
    |> Enum.sort_by(& &1.total, :desc)
    |> Enum.take(limit)
  end
end

Enum.count(all_issues_with_users) |> IO.puts
valid_all_issues_with_users = Enum.filter(all_issues_with_users, fn issue ->
  reporter = issue["fields"]["reporter"]
  assignee = issue["fields"]["assignee"]
  reporter || assignee
end)
user_stats = UserTaskStats.calculate_user_stats(valid_all_issues_with_users)
top_users = UserTaskStats.get_top_users(user_stats)

Vl.new(width: 500)
|> Vl.data_from_values(top_users, only: ["total", "name"])
|> Vl.mark(:bar)
|> Vl.encode_field(:color, "total", type: :quantitative)
|> Vl.encode_field(:x, "total", type: :quantitative)
|> Vl.encode_field(:y, "name", type: :nominal, 
                   sort: [field: "total", order: "descending"])

5. Время работы пользователей над задачами

построить гистограмму, отражающую время, которое затратил пользователь на ее выполнение на основе залогированного времени. По оси абсцисс – расположить время. По оси ординат – расположить суммарное количество задач, которое соответствует этому времени. В расчет брать только закрытые задачи по проекту

issues_with_worklogs_cache_path = Path.join(File.cwd!(), "issues_with_worklogs.json") 

issues_with_worklogs =
  if File.exists?(issues_with_worklogs_cache_path) do
    issues_with_worklogs_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_closed_issues_with_worklogs(jira_url, headers, kafka)
    File.write!(issues_with_worklogs_cache_path, JSON.encode!(result))
    result
  end
IO.puts("Загружено задач: #{Enum.count(issues_with_worklogs)}")

Enum.random(issues_with_worklogs)
issues_with_non_empty_worklogs = Enum.filter(issues_with_worklogs, fn issue ->
  Enum.count(issue["fields"]["worklog"]["worklogs"]) > 0
end)
IO.puts("#{Enum.count(issues_with_non_empty_worklogs)} issues with worklogs")

# Анализируем worklog для каждой задачи
defmodule WorklogAnalyzer do
  def calculate_user_time_per_issue(issues) do
    Enum.flat_map(issues, fn issue ->
      %{"worklogs" => worklogs} = issue["fields"]["worklog"]

      # Группируем время по пользователям для этой задачи
      user_time =
        worklogs
        |> Enum.group_by(fn worklog ->
          worklog["author"]["displayName"] || worklog["author"]["name"]
        end)
        |> Enum.map(fn {user, logs} ->
          total_seconds =
            Enum.reduce(logs, 0, fn log, acc ->
              acc + (log["timeSpentSeconds"] || 0)
            end)

          %{
            issue_key: issue["key"],
            user: user,
            total_seconds: total_seconds,
            total_hours: total_seconds / 3600,
            worklogs_count: length(logs)
          }
        end)

      # Если в задаче есть worklog, возвращаем данные
      if user_time != [] do
        user_time
      else
        []
      end
    end)
  end

  def filter_by_min_time(worklog_data, min_seconds \\ 300) do
    Enum.filter(worklog_data, fn item ->
      item[:total_seconds] >= min_seconds
    end)
  end
end

# Получаем данные о времени пользователей
worklog_data = WorklogAnalyzer.calculate_user_time_per_issue(issues_with_non_empty_worklogs)

IO.puts("Найдено записей worklog: #{Enum.count(worklog_data)}")

# Отфильтруем задачи с очень маленьким временем (меньше 5 минут)
filtered_worklog_data = WorklogAnalyzer.filter_by_min_time(worklog_data, 300)

IO.puts("После фильтрации (≥5 минут): #{Enum.count(filtered_worklog_data)}")

filtered_worklog_data

issues_per_total_hours =
  filtered_worklog_data
  |> Enum.reduce([], fn issue, acc ->
    [issue.total_hours | acc ]
  end)
  |> Enum.frequencies()


data_list = Enum.map(issues_per_total_hours, fn {key, value} -> %{hours: key, issues: value} end)

Vl.new(width: 250, title: "Время работы пользователей над задачами")
|> Vl.data_from_values(data_list, only: ["hours", "issues"])
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "hours", type: :quantitative)
|> Vl.encode_field(:y, "issues", type: :quantitative, scale: [type: :linear])

6. Задачи по приоритетам

построить график, выражающий количество задач по степени серьезности

ran = Enum.random(all_issues)

ran["fields"] |> Map.keys()

all_issues_with_priority_cache_path = Path.join(File.cwd!(), "all_issues_with_priority.json") 

all_issues_with_priority =
  if File.exists?(all_issues_with_priority_cache_path) do
    all_issues_with_priority_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_issues_with_priorities(jira_url, headers, kafka)
    File.write!(all_issues_with_priority_cache_path, JSON.encode!(result))
    result
  end

IO.puts("Загружено задач с информацией о приоритете: #{Enum.count(all_issues_with_priority)}")

Enum.random(all_issues_with_priority)

defmodule PriorityAnalyzer do
  def calculate_priority_distribution(issues) do
    # Собираем статистику по приоритетам
    Enum.reduce(issues, %{}, fn issue, acc ->
      priority_data = issue["fields"]["priority"]
      
      if priority_data do
        priority_name = priority_data["name"] || "Unknown"
        priority_id = priority_data["id"] || "unknown"
        
        # Используем id как ключ для группировки, но отображаем name
        current = Map.get(acc, priority_id, %{
          id: priority_id,
          name: priority_name,
          count: 0
        })
        
        updated = %{current | count: current.count + 1}
        Map.put(acc, priority_id, updated)
      else
        # Задачи без приоритета
        current = Map.get(acc, "no_priority", %{
          id: "no_priority",
          name: "No Priority",
          count: 0
        })
        
        updated = %{current | count: current.count + 1}
        Map.put(acc, "no_priority", updated)
      end
    end)
  end
  
  def get_sorted_priorities(priority_stats) do
    # Сортируем по количеству задач (по убыванию)
    priority_stats
    |> Map.values()
    |> Enum.sort_by(& &1.count, :desc)
  end
  
  def create_chart_data(priority_stats) do
    get_sorted_priorities(priority_stats)
  end
end

sixth_chart_data =
  all_issues_with_priority
  |> PriorityAnalyzer.calculate_priority_distribution
  |> PriorityAnalyzer.create_chart_data

Vl.new(width: 500, title: "Задачи по приоритетам")
|> Vl.data_from_values(sixth_chart_data, only: ["name", "count", "id"])
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "name", type: :nominal,
                   sort: [field: "id", order: "descending"], title: "Приоритет",
                  axis: [label_angle: 0])
|> Vl.encode_field(:y, "count", type: :quantitative, title: "Количество")

Testing

ExUnit.start(autorun: false)

defmodule JiraAnalyticsTest do
  use ExUnit.Case, async: true
  require IssueParser
  require DateRangeFilter
  require StatusTimeCalculator
  require WorklogAnalyzer
  
  describe "IssueParser" do
    test "get_datetime!/1 parses ISO8601 correctly" do
      issue = %{
        "fields" => %{
          "created" => "2023-01-01T10:00:00.000+0000"
        }
      }

      datetime = IssueParser.get_datetime!(issue, "created")

      assert %DateTime{
        year: 2023,
        month: 1,
        day: 1,
        hour: 10,
        minute: 0,
        second: 0
      } = datetime
    end
  end

  describe "DateRangeFilter" do
    test "filter_by_date_range/3 filters data correctly" do
      data = [
        %{date: "2023-01-01", value: 1},
        %{date: "2023-01-15", value: 2},
        %{date: "2023-02-01", value: 3}
      ]

      result = DateRangeFilter.filter_by_date_range(data, "2023-01-01", "2023-01-31")

      assert length(result) == 2
      assert Enum.all?(result, fn %{date: date} -> 
        date >= "2023-01-01" and date <= "2023-01-31"
      end)
    end

    test "filter_two_arrays/4 filters both arrays" do
      array1 = [
        %{date: "2023-01-01", value: 1},
        %{date: "2023-02-01", value: 2}
      ]
      
      array2 = [
        %{date: "2023-01-01", value: 3},
        %{date: "2023-02-01", value: 4}
      ]

      {filtered1, filtered2} = DateRangeFilter.filter_two_arrays(
        array1, array2, "2023-01-01", "2023-01-31"
      )

      assert length(filtered1) == 1
      assert length(filtered2) == 1
      assert Enum.at(filtered1, 0).date == "2023-01-01"
      assert Enum.at(filtered2, 0).date == "2023-01-01"
    end
  end


  describe "StatusTimeCalculator" do
    test "total_time_in_status/2 calculates time for Open status" do
      data = %{
        "created" => ~U[2023-01-01 00:00:00Z],
        "changes" => [
          %{"created" => ~U[2023-01-01 02:00:00Z], "fromString" => "Open", "toString" => "In Progress"}
        ]
      }

      result = StatusTimeCalculator.total_time_in_status(data, "Open")
      
      assert result == 7200 # 2 hours in seconds
    end

    test "total_time_in_status/2 calculates time for other statuses" do
      data = %{
        "created" => ~U[2023-01-01 00:00:00Z],
        "changes" => [
          %{"created" => ~U[2023-01-01 01:00:00Z], "fromString" => "Open", "toString" => "In Progress"},
          %{"created" => ~U[2023-01-01 03:00:00Z], "fromString" => "In Progress", "toString" => "Resolved"},
          %{"created" => ~U[2023-01-01 04:00:00Z], "fromString" => "Resolved", "toString" => "In Progress"},
          %{"created" => ~U[2023-01-01 05:00:00Z], "fromString" => "In Progress", "toString" => "Closed"}
        ]
      }

      result = StatusTimeCalculator.total_time_in_status(data, "In Progress")
      
      # First period: 01:00 to 03:00 = 2 hours
      # Second period: 04:00 to 05:00 = 1 hour
      # Total: 3 hours = 10800 seconds
      assert result == 10800
    end

    test "find_time_spans/2 finds correct time spans" do
      transitions = [
        %{"created" => ~U[2023-01-01 01:00:00Z], "fromString" => "Open", "toString" => "In Progress"},
        %{"created" => ~U[2023-01-01 03:00:00Z], "fromString" => "In Progress", "toString" => "Resolved"},
        %{"created" => ~U[2023-01-01 04:00:00Z], "fromString" => "Resolved", "toString" => "In Progress"},
        %{"created" => ~U[2023-01-01 05:00:00Z], "fromString" => "In Progress", "toString" => "Closed"}
      ]

      result = StatusTimeCalculator.find_time_spans(transitions, "In Progress")
      
      assert result == [
        {~U[2023-01-01 04:00:00Z], ~U[2023-01-01 05:00:00Z]},
        {~U[2023-01-01 01:00:00Z], ~U[2023-01-01 03:00:00Z]}
      ]
    end
  end

  describe "WorklogAnalyzer" do
    test "calculate_user_time_per_issue/1 groups worklogs by user" do
      issues = [
        %{
          "key" => "TEST-1",
          "fields" => %{
            "worklog" => %{
              "worklogs" => [
                %{
                  "author" => %{"displayName" => "User1"},
                  "timeSpentSeconds" => 3600
                },
                %{
                  "author" => %{"displayName" => "User2"},
                  "timeSpentSeconds" => 1800
                },
                %{
                  "author" => %{"displayName" => "User1"},
                  "timeSpentSeconds" => 1800
                }
              ]
            }
          }
        }
      ]

      result = WorklogAnalyzer.calculate_user_time_per_issue(issues)

      assert [
        %{
          issue_key: "TEST-1",
          user: "User1",
          total_seconds: 5400,
          total_hours: 1.5,
          worklogs_count: 2
        },
        %{
          issue_key: "TEST-1",
          user: "User2",
          total_seconds: 1800,
          total_hours: 0.5,
          worklogs_count: 1
        }
      ] = result
    end

    test "filter_by_min_time/2 filters correctly" do
      worklog_data = [
        %{total_seconds: 300, total_hours: 0.083},
        %{total_seconds: 299, total_hours: 0.083},
        %{total_seconds: 600, total_hours: 0.167}
      ]

      result = WorklogAnalyzer.filter_by_min_time(worklog_data, 300)

      assert length(result) == 2
      refute Enum.any?(result, fn item -> item.total_seconds == 299 end)
    end
  end
end


ExUnit.run()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tasks analysis

Введение

0. Общие для всех запросов параметры. Модуль загрузки данных из Jira

1. Время, которое задача провела от момента создания до момента закрытия

2. Распределение времени жизни задач в определённом статусе

3. Количество открытых и закрытых задач по дням и накопительный итог

4. Самые активные пользователи

5. Время работы пользователей над задачами

6. Задачи по приоритетам

Testing

FilesExpand file tree

tasks_analysis.livemd

Latest commit

History

tasks_analysis.livemd

File metadata and controls

Tasks analysis

Введение

0. Общие для всех запросов параметры. Модуль загрузки данных из Jira

1. Время, которое задача провела от момента создания до момента закрытия

2. Распределение времени жизни задач в определённом статусе

3. Количество открытых и закрытых задач по дням и накопительный итог

4. Самые активные пользователи

5. Время работы пользователей над задачами

6. Задачи по приоритетам

Testing