Skip to content

Latest commit

 

History

History
1163 lines (968 loc) · 38.4 KB

File metadata and controls

1163 lines (968 loc) · 38.4 KB

Tasks analysis

Mix.install([
  {:merquery, "~> 0.3.0"},
  {:req, "~> 0.5.16"},
  {:kino, "~> 0.18.0"},
  {:vega_lite, "~> 0.1.11"},
  {:kino_vega_lite, "~> 0.1.13"},
])

Введение

File.cwd!

Для разработки программного средства для аналитики задач был выбран язык Elixir и проект Livebook.

Для визуализации используется библиотека VegaLite

alias VegaLite, as: Vl
IO.puts "Hello, World"

Для выполнения HTTP запросов в проект добавлены библиотеки

  • req http client
  • merquery smart-cell обёртка над req
{:ok, %{status: st}} = Req.get("https://api.github.com/repos/elixir-lang/elixir")
st

0. Общие для всех запросов параметры. Модуль загрузки данных из Jira

jira_url = "https://issues.apache.org/jira/rest/api/2"
kafka = "KAFKA"
headers = [{"Accept", "application/json"}]

"#{jira_url}/project/#{kafka}"
defmodule JiraFetcher do
  def fetch_page(base_url, headers, jql, start_at, fields, expand \\ nil) do
    url = "#{base_url}/search"
    params = [
      jql: jql,
      startAt: start_at,
      maxResults: 100,
      fields: fields,
    ]
    params =
      if expand do
        Keyword.put(params, :expand, expand)
      else
        params
      end
    
    case Req.get(url, headers: headers, params: params) do
      {:ok, %{status: 200, body: body}} -> {:ok, body}
      {:ok, response} ->
        IO.inspect(response, label: "Ошибка запроса")
        {:error, response}
    end
  end

  def fetch_all_closed_issues_with_history(base_url, headers, project_key) do
    jql = "project = #{project_key} AND statusCategory = Done"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        case fetch_page(base_url, headers, jql, start_at, "created,status,updated,resolutiondate", "changelog") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_issues(base_url, headers, project_key) do
    jql = "project = #{project_key}"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        case fetch_page(base_url, headers, jql, start_at, "created,status,updated,resolutiondate", "changelog") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_issues_with_users(base_url, headers, project_key) do
    jql = "project = #{project_key}"
    
    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        # Запрашиваем поля assignee (исполнитель) и reporter (репортер)
        case fetch_page(base_url, headers, jql, start_at, "assignee,reporter") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_closed_issues_with_worklogs(base_url, headers, project_key) do
    jql = "project = #{project_key} AND statusCategory = Done"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        # Запрашиваем worklog для каждой задачи
        case fetch_page(base_url, headers, jql, start_at, "worklog") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end

  def fetch_all_issues_with_priorities(base_url, headers, project_key) do
    jql = "project = #{project_key}"

    Stream.resource(
      fn -> 0 end,
      fn start_at ->
        case fetch_page(base_url, headers, jql, start_at, "priority") do
          {:ok, %{"issues" => issues, "total" => total}} ->
            if issues == [] or start_at >= total do
              {:halt, start_at}
            else
              {issues, start_at + length(issues)}
            end
          _ -> {:halt, start_at}
        end
      end,
      fn _ -> :ok end
    )
    |> Enum.to_list()
  end
end

1. Время, которое задача провела от момента создания до момента закрытия

Построение гистограммы, отражающей время, которое задача провела в открытом состоянии (от момента создания до момента закрытия). По оси абсцисс – расположить время. По оси ординат – расположить суммарное количество задач, которое было в открытом виде соответствующее время. В расчет брать только закрытые задачи по проекту;

all_closed_issues_cache_path = Path.join(File.cwd!(), "all_closed_issues.json") 

all_closed_issues =
  if File.exists?(all_closed_issues_cache_path) do
    all_closed_issues_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_closed_issues_with_history(jira_url, headers, kafka)
    File.write!(all_closed_issues_cache_path, JSON.encode!(result))
    result
  end
all_closed_issues
|> length() |> IO.puts()

issue = Enum.at(all_closed_issues, 10)

# Map of `Livespan => Number of issues with that timespan`
frequencies =
  all_closed_issues
  |> Enum.reduce([], fn issue, acc ->
    {:ok, created_at, _} = DateTime.from_iso8601(issue["fields"]["created"])
    {:ok, closed_at, _} = DateTime.from_iso8601(issue["fields"]["resolutiondate"])
    time_to_close = DateTime.diff(closed_at, created_at, :day)
    [time_to_close | acc ]
  end)
  |> Enum.frequencies()

{min_livespan, max_livespan} = Map.keys(frequencies) |> Enum.min_max()
x_data = min_livespan..max_livespan

y_data =
  Enum.reduce(x_data, [], fn livespan, acc ->
    [Map.get(frequencies, livespan, 0) | acc]
  end)
  |> Enum.reverse()

first_chart_data = %{
  x_data: x_data,
  y_data: y_data
}
clip_after = 100

Vl.new(width: 700, height: 500, title: "Распределение времени жизни задач в открытом состоянии")
|> Vl.data_from_values(first_chart_data, only: ["x_data", "y_data"])
|> Vl.mark(:bar)
# |> Vl.transform(filter: "datum.x_data <= #{clip_after}")
# |> Vl.encode_field(:x, "x_data", type: :quantitative, title: "Время", scale: [domain_min: 0, domain_max: clip_after, zero: true])
|> Vl.encode_field(:x, "x_data", type: :quantitative, title: "Время в открытом состоянии (дни)", scale: [type: :pow, exponent: 0.5, domain_min: 0, zero: true])
|> Vl.encode_field(:y, "y_data", type: :quantitative, title: "Частота встречаемости")

2. Распределение времени жизни задач в определённом статусе

построить диаграммы, которые показывают распределение времени по состояниям задачи. По оси абсцисс – расположить время. По оси ординат – расположить суммарное количество задач, которое было в открытом виде соответствующее время. В расчет брать только закрытые задачи по проекту. Для каждого состояния должна строиться своя диаграмма

issues_with_history = all_closed_issues
defmodule IssueParser do
  def get_datetime!(issue, field_type) do
    {:ok, datetime, _} = DateTime.from_iso8601(issue["fields"][field_type])
    datetime
  end
end

defmodule StatusTimeCalculator do
  def total_time_in_status(data, "Open") do
    opened_at = data["created"]
    changed_from_open =
      data["changes"]
      |> Enum.find(fn entry ->
        entry["fromString"] == "Open"   
      end)
      |> get_in(["created"])
    
    DateTime.diff(changed_from_open, opened_at, :second)
  end
  
  def total_time_in_status(data, target_status) do
    # Извлекаем массив переходов из ключа "changes"
    transitions = Map.get(data, "changes", [])
    
    transitions
    |> Enum.sort_by(& &1["created"], DateTime)
    |> find_time_spans(target_status)
    |> Enum.reduce(0, fn {start_time, end_time}, acc ->
      DateTime.diff(end_time, start_time, :second) + acc
    end)
  end

  def find_time_spans(sorted_transitions, target_status) do
    {_, spans} = 
      Enum.reduce(sorted_transitions, {nil, []}, fn 
        transition, {current_status, acc} ->
          # Если входим в целевой статус
          if transition["toString"] == target_status do
            {target_status, [{transition["created"], nil} | acc]}
          
          # Если выходим из целевого статуса
          else if transition["fromString"] == target_status do
            # Обновляем последний промежуток с конечным временем
            updated_acc = update_last_span(acc, transition["created"])
            {nil, updated_acc}
          
          else
            {current_status, acc}
          end
        end
      end)
    
    # Фильтруем только завершенные промежутки
    Enum.filter(spans, fn
      {_start, nil} -> false
      {_, _} -> true
    end)
  end

  defp update_last_span([], _end_time), do: []
  defp update_last_span([{start, nil} | rest], end_time) do
    [{start, end_time} | rest]
  end
  defp update_last_span([span | rest], end_time) do
    [span | update_last_span(rest, end_time)]
  end
end
IO.puts(Enum.count(issues_with_history))

# Проверка на то, что во всех подтянутых задачах подтянулась вся история изменений.
# Ответ: Да, во всех закрытых подтягивается полностью
# Enum.each(issues_with_history, fn issue ->
#   total = issue["changelog"]["total"]
#   loaded_total = issue["changelog"]["histories"] |> Enum.count()
#   if total != loaded_total do
#     false
#   end
# end)

defmodule ViewStatusTimeDistribution do
  def has_status(issue, status) do
    with %{"changelog" => %{"histories" => histories}} <- issue do
      Enum.any?(histories, fn history ->
        items = history["items"]
        Enum.any?(items, fn item ->
          if item["field"] == "status" do
            item["toString"] == status
          else
            false
          end
        end)
      end)
    else
      _ -> false
    end
  end

  def get_status_time_distribution(issues_with_history, status) do
    issues_with_desired_status =
      Enum.filter(issues_with_history, fn issue -> has_status(issue, status) end)
    
    frequencies =
      issues_with_desired_status
      |> Enum.reduce([], fn issue, acc ->
        status_changes =
          issue["changelog"]["histories"]
          |> Enum.filter(fn history ->
            Enum.any?(history["items"], fn item ->
              if item["field"] == "status" do
                true
              else
                false
              end
            end)
          end)
          |> Enum.flat_map(fn entry ->
            entry["items"]
            |> Enum.filter(fn item ->
              item["fromString"] && item["toString"] && item["field"] == "status"
            end)
            |> Enum.map(fn item ->
              %{
                "created" => entry["created"],
                "fromString" => item["fromString"],
                "toString" => item["toString"]
              }
            end)
          end)
          |> Enum.map(fn change ->
            {:ok, from, _} = DateTime.from_iso8601(change["created"])
            %{change | "created" => from}
          end)
        
        issue_history = %{
          "created" => IssueParser.get_datetime!(issue, "created"),
          "changes" => status_changes
        }
        seconds_in_status = StatusTimeCalculator.total_time_in_status(issue_history, status)
        [trunc(seconds_in_status / 86400) | acc]
      end)
      |> Enum.frequencies()
    
    {min_livespan, max_livespan} = Map.keys(frequencies) |> Enum.min_max()
    x_data = min_livespan..max_livespan
    
    y_data =
      Enum.reduce(x_data, [], fn livespan, acc ->
        [Map.get(frequencies, livespan, 0) | acc]
      end)
      |> Enum.reverse()
    
    second_chart_data = %{
      x_data: x_data,
      y_data: y_data
    }
    view(second_chart_data, status)
  end
  
  def view(chart_data, status) do
    clip_after = 100

    Vl.new(width: 700, height: 500, title: "Распределение времени жизни задач в статусе #{status}")
    |> Vl.data_from_values(chart_data, only: ["x_data", "y_data"])
    |> Vl.mark(:bar)
    # |> Vl.transform(filter: "datum.x_data <= #{clip_after}")
    # |> Vl.encode_field(:x, "x_data", type: :quantitative, title: "Время в состоянии (дни)", scale: [domain_min: 0, domain_max: clip_after, zero: true])
      |> Vl.encode_field(:x, "x_data", 
          type: :quantitative, title: "Время в состоянии (дни)",
          scale: [type: :pow, exponent: 0.5, domain_min: 0, zero: true]
      )
    |> Vl.encode_field(:y, "y_data", type: :quantitative, title: "Частота встречаемости")

  end
end
statuses = [
  "Open",
  "In Progress",
  "Reopened",
  "Resolved",
  "Closed",
  "Patch Available"
]
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Open")
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "In Progress")
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Reopened")
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Resolved")
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Closed")
ViewStatusTimeDistribution.get_status_time_distribution(issues_with_history, "Patch Available")

3. Количество открытых и закрытых задач по дням и накопительный итог

построить график, показывающий количество заведенных и закрытых задач в день. График должен отражать не только информацию о количестве задач в день, но и накопительный итог по задачам. По оси абсцисс – расположить календарные дни. По оси ординат – расположить суммарное количество задач для этого дня (открытые, закрытые и для каждого – накопительный итог);

all_issues_cache_path = Path.join(File.cwd!(), "all_issues.json") 

all_issues =
  if File.exists?(all_issues_cache_path) do
    all_issues_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_issues(jira_url, headers, kafka)
    File.write!(all_issues_cache_path, JSON.encode!(result))
    result
  end
Enum.count(all_issues) |> IO.puts # 18723. Yes we got them all

defmodule DailyTaskStats do
  def calculate_daily_stats(issues) do
    # Парсим даты создания и закрытия
    issues
    |> Enum.map(fn issue ->
      {:ok, created_dt, _} = DateTime.from_iso8601(issue["fields"]["created"])
      created_date = Date.to_iso8601(Date.from_erl!({created_dt.year, created_dt.month, created_dt.day}))
      
      closed_date = case issue["fields"]["resolutiondate"] do
        nil -> nil
        date_str -> 
          {:ok, closed_dt, _} = DateTime.from_iso8601(date_str)
          Date.to_iso8601(Date.from_erl!({closed_dt.year, closed_dt.month, closed_dt.day}))
      end
      
      %{created: created_date, closed: closed_date}
    end)
  end

   def aggregate_daily_data(stats) do
    # Инициализируем пустую мапу для агрегации
    # %{
    #   date: "2011-08-10",
    #   created_daily: 1,
    #   closed_daily: 2,
    #   created_cumulative: 93,
    #   closed_cumulative: 56
    # }
    daily_data = %{}
    
    # Агрегируем по дням
    daily_data = Enum.reduce(stats, daily_data, fn stat, acc ->
      # Увеличиваем счетчик созданных задач
      created_count = Map.get(acc, stat.created, %{created: 0, closed: 0})
      created_count = %{created_count | created: created_count.created + 1}
      acc = Map.put(acc, stat.created, created_count)
      
      # Если задача закрыта, увеличиваем счетчик закрытых
      if stat.closed do
        closed_count = Map.get(acc, stat.closed, %{created: 0, closed: 0})
        closed_count = %{closed_count | closed: closed_count.closed + 1}
        Map.put(acc, stat.closed, closed_count)
      else
        acc
      end
    end)
    
    # Сортируем даты по возрастанию
    sorted_dates = Map.keys(daily_data) |> Enum.sort()
    
    # Вычисляем накопительные итоги
    cumulative_data = Enum.reduce(
        sorted_dates, {[], 0, 0},
        fn date, {acc, cum_created, cum_closed} ->
          day_data = Map.get(daily_data, date)
          new_cum_created = cum_created + day_data.created
          new_cum_closed = cum_closed + day_data.closed
          
          day_record = %{
            date: date,
            created_daily: day_data.created,
            closed_daily: day_data.closed,
            created_cumulative: new_cum_created,
            closed_cumulative: new_cum_closed
          }
          
          {[day_record | acc], new_cum_created, new_cum_closed}
        end
    )
    
    # Возвращаем данные в правильном порядке
    elem(cumulative_data, 0) |> Enum.reverse()
  end

  def create_separate_chart_data(daily_data) do
    # Преобразуем данные в отдельные наборы для каждого типа
    daily_series = 
      Enum.flat_map(daily_data, fn day ->
        [
          %{date: day.date, value: day.created_daily, type: "Создано"},
          %{date: day.date, value: day.closed_daily, type: "Закрыто"}
        ]
      end)
    
    cumulative_series = 
      Enum.flat_map(daily_data, fn day ->
        [
          %{date: day.date, value: day.created_cumulative, type: "Создано"},
          %{date: day.date, value: day.closed_cumulative, type: "Закрыто"}
        ]
      end)
    {daily_series, cumulative_series}
  end
end

stats = DailyTaskStats.calculate_daily_stats(all_issues)
daily_data = DailyTaskStats.aggregate_daily_data(stats)
{daily_series, cumulative_series} = DailyTaskStats.create_separate_chart_data(daily_data)

defmodule DateRangeFilter do
  def filter_by_date_range(list, start_date, end_date) do
    Enum.filter(list, fn %{date: date} ->
      date >= start_date and date <= end_date
    end)
  end
  
  def filter_two_arrays(array1, array2, start_date, end_date) do
    {
      filter_by_date_range(array1, start_date, end_date),
      filter_by_date_range(array2, start_date, end_date)
    }
  end
end

start_date = "2020-09-01"
finish_date = "2025-12-01"
{daily_series, cumulative_series} =
  DateRangeFilter.filter_two_arrays(daily_series, cumulative_series, start_date, finish_date)

max_cumulative_value = Enum.max_by(cumulative_series, &(&1.value)).value
min_cumulative_value = Enum.min_by(cumulative_series, &(&1.value)).value
Vl.new(width: 600, height: 300, title: ["Количество открытых и закрытых задач по дням и накопительный итог", #{start_date} по #{finish_date}"])
|> Vl.data_from_values(daily_series, only: ["date"])
|> Vl.encode_field(:x, "date", type: :temporal, title: "Дни")
|> Vl.layers([
  Vl.new()
  |> Vl.data_from_values(daily_series, only: ["date", "value", "type"])
  |> Vl.mark(:rule)
  |> Vl.encode_field(:y, "value", type: :quantitative, title: "Динамика задач в день")
  |> Vl.encode_field(:color, "type", type: :nominal, title: "Тип задачи"),
  Vl.new()
  |> Vl.data_from_values(cumulative_series, only: ["date", "value", "type"])
  |> Vl.mark(:line)
  |> Vl.encode_field(:y, "value", type: :quantitative, title: "Задач в день накопительно") # , scale: [domain_min: min_cumulative_value, domain_max: max_cumulative_value, range_min: 160]
  |> Vl.encode_field(:color, "type", type: :nominal, title: "Тип задачи"),
])
|> Vl.resolve(:scale, y: :independent)

4. Самые активные пользователи

построить график, выражающий общее количество задач для пользователей, в которых он указан как исполнитель и репортер. По оси абсцисс – расположить количество задач. По оси ординат – расположить имя пользователя. Отразить график для 30 топовых пользователей с максимальным количеством задач;

all_issues_with_users_cache_path = Path.join(File.cwd!(), "all_issues_with_users.json") 

all_issues_with_users =
  if File.exists?(all_issues_with_users_cache_path) do
    all_issues_with_users_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_issues_with_users(jira_url, headers, kafka)
    File.write!(all_issues_with_users_cache_path, JSON.encode!(result))
    result
  end
defmodule UserTaskStats do
  def calculate_user_stats(issues) do
    # Собираем статистику по пользователям
    Enum.reduce(issues, %{}, fn issue, acc ->
      # Обрабатываем assignee (исполнитель)
      assignee_key = get_user_key(issue["fields"]["assignee"])
      reporter_key = get_user_key(issue["fields"]["reporter"])

      if assignee_key == reporter_key do
        update_user_stats(acc, assignee_key, :both_count)
      else
        acc =
          if assignee_key do
            update_user_stats(acc, assignee_key, :assignee_count)
          else
            acc
          end

        if reporter_key do
          update_user_stats(acc, reporter_key, :reporter_count)
        else
          acc
        end
      end
    end)
  end

  defp get_user_key(user_data) do
    case user_data do
      %{"key" => key, "displayName" => name} -> 
        {key, name}
      %{"key" => key} -> 
        {key, key}
      %{"displayName" => name} -> 
        {name, name}
      _ ->
        nil
    end
  end

  defp update_user_stats(acc, {user_key, user_name}, type) do
    current =
      Map.get(acc, user_key, %{name: user_name, assignee_count: 0, reporter_count: 0, total: 0})

    updated =
      case type do
        :assignee_count ->
          %{current | assignee_count: current.assignee_count + 1}

        :reporter_count ->
          %{current | reporter_count: current.reporter_count + 1}

        :both_count ->
          %{
            current
            | assignee_count: current.assignee_count + 1,
              reporter_count: current.reporter_count + 1
          }
      end

    new_total =
      case type do
        :both_count -> updated.assignee_count + updated.reporter_count - 1
        _ -> updated.assignee_count + updated.reporter_count
      end

    updated = %{updated | total: new_total}

    Map.put(acc, user_key, updated)
  end

  def get_top_users(user_stats, limit \\ 30) do
    user_stats
    |> Map.values()
    |> Enum.sort_by(& &1.total, :desc)
    |> Enum.take(limit)
  end
end
Enum.count(all_issues_with_users) |> IO.puts
valid_all_issues_with_users = Enum.filter(all_issues_with_users, fn issue ->
  reporter = issue["fields"]["reporter"]
  assignee = issue["fields"]["assignee"]
  reporter || assignee
end)
user_stats = UserTaskStats.calculate_user_stats(valid_all_issues_with_users)
top_users = UserTaskStats.get_top_users(user_stats)
Vl.new(width: 500)
|> Vl.data_from_values(top_users, only: ["total", "name"])
|> Vl.mark(:bar)
|> Vl.encode_field(:color, "total", type: :quantitative)
|> Vl.encode_field(:x, "total", type: :quantitative)
|> Vl.encode_field(:y, "name", type: :nominal, 
                   sort: [field: "total", order: "descending"])

5. Время работы пользователей над задачами

построить гистограмму, отражающую время, которое затратил пользователь на ее выполнение на основе залогированного времени. По оси абсцисс – расположить время. По оси ординат – расположить суммарное количество задач, которое соответствует этому времени. В расчет брать только закрытые задачи по проекту

issues_with_worklogs_cache_path = Path.join(File.cwd!(), "issues_with_worklogs.json") 

issues_with_worklogs =
  if File.exists?(issues_with_worklogs_cache_path) do
    issues_with_worklogs_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_closed_issues_with_worklogs(jira_url, headers, kafka)
    File.write!(issues_with_worklogs_cache_path, JSON.encode!(result))
    result
  end
IO.puts("Загружено задач: #{Enum.count(issues_with_worklogs)}")
Enum.random(issues_with_worklogs)
issues_with_non_empty_worklogs = Enum.filter(issues_with_worklogs, fn issue ->
  Enum.count(issue["fields"]["worklog"]["worklogs"]) > 0
end)
IO.puts("#{Enum.count(issues_with_non_empty_worklogs)} issues with worklogs")
# Анализируем worklog для каждой задачи
defmodule WorklogAnalyzer do
  def calculate_user_time_per_issue(issues) do
    Enum.flat_map(issues, fn issue ->
      %{"worklogs" => worklogs} = issue["fields"]["worklog"]

      # Группируем время по пользователям для этой задачи
      user_time =
        worklogs
        |> Enum.group_by(fn worklog ->
          worklog["author"]["displayName"] || worklog["author"]["name"]
        end)
        |> Enum.map(fn {user, logs} ->
          total_seconds =
            Enum.reduce(logs, 0, fn log, acc ->
              acc + (log["timeSpentSeconds"] || 0)
            end)

          %{
            issue_key: issue["key"],
            user: user,
            total_seconds: total_seconds,
            total_hours: total_seconds / 3600,
            worklogs_count: length(logs)
          }
        end)

      # Если в задаче есть worklog, возвращаем данные
      if user_time != [] do
        user_time
      else
        []
      end
    end)
  end

  def filter_by_min_time(worklog_data, min_seconds \\ 300) do
    Enum.filter(worklog_data, fn item ->
      item[:total_seconds] >= min_seconds
    end)
  end
end

# Получаем данные о времени пользователей
worklog_data = WorklogAnalyzer.calculate_user_time_per_issue(issues_with_non_empty_worklogs)

IO.puts("Найдено записей worklog: #{Enum.count(worklog_data)}")

# Отфильтруем задачи с очень маленьким временем (меньше 5 минут)
filtered_worklog_data = WorklogAnalyzer.filter_by_min_time(worklog_data, 300)

IO.puts("После фильтрации (≥5 минут): #{Enum.count(filtered_worklog_data)}")

filtered_worklog_data
issues_per_total_hours =
  filtered_worklog_data
  |> Enum.reduce([], fn issue, acc ->
    [issue.total_hours | acc ]
  end)
  |> Enum.frequencies()


data_list = Enum.map(issues_per_total_hours, fn {key, value} -> %{hours: key, issues: value} end)
Vl.new(width: 250, title: "Время работы пользователей над задачами")
|> Vl.data_from_values(data_list, only: ["hours", "issues"])
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "hours", type: :quantitative)
|> Vl.encode_field(:y, "issues", type: :quantitative, scale: [type: :linear])

6. Задачи по приоритетам

построить график, выражающий количество задач по степени серьезности

ran = Enum.random(all_issues)

ran["fields"] |> Map.keys()

all_issues_with_priority_cache_path = Path.join(File.cwd!(), "all_issues_with_priority.json") 

all_issues_with_priority =
  if File.exists?(all_issues_with_priority_cache_path) do
    all_issues_with_priority_cache_path |> File.read!() |> JSON.decode!()
  else
    result = JiraFetcher.fetch_all_issues_with_priorities(jira_url, headers, kafka)
    File.write!(all_issues_with_priority_cache_path, JSON.encode!(result))
    result
  end
IO.puts("Загружено задач с информацией о приоритете: #{Enum.count(all_issues_with_priority)}")
Enum.random(all_issues_with_priority)
defmodule PriorityAnalyzer do
  def calculate_priority_distribution(issues) do
    # Собираем статистику по приоритетам
    Enum.reduce(issues, %{}, fn issue, acc ->
      priority_data = issue["fields"]["priority"]
      
      if priority_data do
        priority_name = priority_data["name"] || "Unknown"
        priority_id = priority_data["id"] || "unknown"
        
        # Используем id как ключ для группировки, но отображаем name
        current = Map.get(acc, priority_id, %{
          id: priority_id,
          name: priority_name,
          count: 0
        })
        
        updated = %{current | count: current.count + 1}
        Map.put(acc, priority_id, updated)
      else
        # Задачи без приоритета
        current = Map.get(acc, "no_priority", %{
          id: "no_priority",
          name: "No Priority",
          count: 0
        })
        
        updated = %{current | count: current.count + 1}
        Map.put(acc, "no_priority", updated)
      end
    end)
  end
  
  def get_sorted_priorities(priority_stats) do
    # Сортируем по количеству задач (по убыванию)
    priority_stats
    |> Map.values()
    |> Enum.sort_by(& &1.count, :desc)
  end
  
  def create_chart_data(priority_stats) do
    get_sorted_priorities(priority_stats)
  end
end
sixth_chart_data =
  all_issues_with_priority
  |> PriorityAnalyzer.calculate_priority_distribution
  |> PriorityAnalyzer.create_chart_data
Vl.new(width: 500, title: "Задачи по приоритетам")
|> Vl.data_from_values(sixth_chart_data, only: ["name", "count", "id"])
|> Vl.mark(:bar)
|> Vl.encode_field(:x, "name", type: :nominal,
                   sort: [field: "id", order: "descending"], title: "Приоритет",
                  axis: [label_angle: 0])
|> Vl.encode_field(:y, "count", type: :quantitative, title: "Количество")

Testing

ExUnit.start(autorun: false)

defmodule JiraAnalyticsTest do
  use ExUnit.Case, async: true
  require IssueParser
  require DateRangeFilter
  require StatusTimeCalculator
  require WorklogAnalyzer
  
  describe "IssueParser" do
    test "get_datetime!/1 parses ISO8601 correctly" do
      issue = %{
        "fields" => %{
          "created" => "2023-01-01T10:00:00.000+0000"
        }
      }

      datetime = IssueParser.get_datetime!(issue, "created")

      assert %DateTime{
        year: 2023,
        month: 1,
        day: 1,
        hour: 10,
        minute: 0,
        second: 0
      } = datetime
    end
  end

  describe "DateRangeFilter" do
    test "filter_by_date_range/3 filters data correctly" do
      data = [
        %{date: "2023-01-01", value: 1},
        %{date: "2023-01-15", value: 2},
        %{date: "2023-02-01", value: 3}
      ]

      result = DateRangeFilter.filter_by_date_range(data, "2023-01-01", "2023-01-31")

      assert length(result) == 2
      assert Enum.all?(result, fn %{date: date} -> 
        date >= "2023-01-01" and date <= "2023-01-31"
      end)
    end

    test "filter_two_arrays/4 filters both arrays" do
      array1 = [
        %{date: "2023-01-01", value: 1},
        %{date: "2023-02-01", value: 2}
      ]
      
      array2 = [
        %{date: "2023-01-01", value: 3},
        %{date: "2023-02-01", value: 4}
      ]

      {filtered1, filtered2} = DateRangeFilter.filter_two_arrays(
        array1, array2, "2023-01-01", "2023-01-31"
      )

      assert length(filtered1) == 1
      assert length(filtered2) == 1
      assert Enum.at(filtered1, 0).date == "2023-01-01"
      assert Enum.at(filtered2, 0).date == "2023-01-01"
    end
  end


  describe "StatusTimeCalculator" do
    test "total_time_in_status/2 calculates time for Open status" do
      data = %{
        "created" => ~U[2023-01-01 00:00:00Z],
        "changes" => [
          %{"created" => ~U[2023-01-01 02:00:00Z], "fromString" => "Open", "toString" => "In Progress"}
        ]
      }

      result = StatusTimeCalculator.total_time_in_status(data, "Open")
      
      assert result == 7200 # 2 hours in seconds
    end

    test "total_time_in_status/2 calculates time for other statuses" do
      data = %{
        "created" => ~U[2023-01-01 00:00:00Z],
        "changes" => [
          %{"created" => ~U[2023-01-01 01:00:00Z], "fromString" => "Open", "toString" => "In Progress"},
          %{"created" => ~U[2023-01-01 03:00:00Z], "fromString" => "In Progress", "toString" => "Resolved"},
          %{"created" => ~U[2023-01-01 04:00:00Z], "fromString" => "Resolved", "toString" => "In Progress"},
          %{"created" => ~U[2023-01-01 05:00:00Z], "fromString" => "In Progress", "toString" => "Closed"}
        ]
      }

      result = StatusTimeCalculator.total_time_in_status(data, "In Progress")
      
      # First period: 01:00 to 03:00 = 2 hours
      # Second period: 04:00 to 05:00 = 1 hour
      # Total: 3 hours = 10800 seconds
      assert result == 10800
    end

    test "find_time_spans/2 finds correct time spans" do
      transitions = [
        %{"created" => ~U[2023-01-01 01:00:00Z], "fromString" => "Open", "toString" => "In Progress"},
        %{"created" => ~U[2023-01-01 03:00:00Z], "fromString" => "In Progress", "toString" => "Resolved"},
        %{"created" => ~U[2023-01-01 04:00:00Z], "fromString" => "Resolved", "toString" => "In Progress"},
        %{"created" => ~U[2023-01-01 05:00:00Z], "fromString" => "In Progress", "toString" => "Closed"}
      ]

      result = StatusTimeCalculator.find_time_spans(transitions, "In Progress")
      
      assert result == [
        {~U[2023-01-01 04:00:00Z], ~U[2023-01-01 05:00:00Z]},
        {~U[2023-01-01 01:00:00Z], ~U[2023-01-01 03:00:00Z]}
      ]
    end
  end

  describe "WorklogAnalyzer" do
    test "calculate_user_time_per_issue/1 groups worklogs by user" do
      issues = [
        %{
          "key" => "TEST-1",
          "fields" => %{
            "worklog" => %{
              "worklogs" => [
                %{
                  "author" => %{"displayName" => "User1"},
                  "timeSpentSeconds" => 3600
                },
                %{
                  "author" => %{"displayName" => "User2"},
                  "timeSpentSeconds" => 1800
                },
                %{
                  "author" => %{"displayName" => "User1"},
                  "timeSpentSeconds" => 1800
                }
              ]
            }
          }
        }
      ]

      result = WorklogAnalyzer.calculate_user_time_per_issue(issues)

      assert [
        %{
          issue_key: "TEST-1",
          user: "User1",
          total_seconds: 5400,
          total_hours: 1.5,
          worklogs_count: 2
        },
        %{
          issue_key: "TEST-1",
          user: "User2",
          total_seconds: 1800,
          total_hours: 0.5,
          worklogs_count: 1
        }
      ] = result
    end

    test "filter_by_min_time/2 filters correctly" do
      worklog_data = [
        %{total_seconds: 300, total_hours: 0.083},
        %{total_seconds: 299, total_hours: 0.083},
        %{total_seconds: 600, total_hours: 0.167}
      ]

      result = WorklogAnalyzer.filter_by_min_time(worklog_data, 300)

      assert length(result) == 2
      refute Enum.any?(result, fn item -> item.total_seconds == 299 end)
    end
  end
end


ExUnit.run()