Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions backend/plugins/github/e2e/account_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@ package e2e
import (
"testing"

"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/models/domainlayer/crossdomain"
"github.com/apache/incubator-devlake/core/models/domainlayer/didgen"
"github.com/apache/incubator-devlake/helpers/e2ehelper"
"github.com/apache/incubator-devlake/plugins/github/impl"
"github.com/apache/incubator-devlake/plugins/github/models"
"github.com/apache/incubator-devlake/plugins/github/tasks"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestAccountDataFlow(t *testing.T) {
Expand Down Expand Up @@ -108,4 +112,33 @@ func TestAccountDataFlow(t *testing.T) {
"_raw_data_remark",
},
)

// Referential-integrity invariant (#8886): every account the repo references in
// _tool_github_repo_accounts must have a domain `accounts` row, so issues.creator_id /
// pull_requests.author_id / merged_by_id never point at a missing account. We generate
// the domain id with the SAME generator the issue/PR convertors use, so this is a
// faithful proxy for the FK join the issue reported as broken. It also fails loudly if a
// future change shrinks ConvertAccounts' coverage or diverges the id generation.
accountIdGen := didgen.NewDomainIdGenerator(&models.GithubAccount{})
var repoAccounts []models.GithubRepoAccount
require.NoError(t, dataflowTester.Dal.All(&repoAccounts,
dal.Where("repo_github_id = ? AND connection_id = ? AND account_id > 0",
taskData.Options.GithubId, taskData.Options.ConnectionId),
))
require.NotEmpty(t, repoAccounts, "fixture must reference at least one account")
sawOrphanCase := false
for _, ra := range repoAccounts {
if ra.Login == "milichev" {
sawOrphanCase = true // the non-committer author from the issue repro
}
domainId := accountIdGen.Generate(taskData.Options.ConnectionId, ra.AccountId)
count, err := dataflowTester.Dal.Count(
dal.From(&crossdomain.Account{}),
dal.Where("id = ?", domainId),
)
require.NoError(t, err)
assert.Equalf(t, int64(1), count,
"orphan FK: repo account %q (id=%d) has no domain accounts row %q", ra.Login, ra.AccountId, domainId)
}
assert.True(t, sawOrphanCase, "fixture should include the non-committer orphan case (milichev)")
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ connection_id,account_id,repo_github_id,login
1,3971390,134018330,ppmoon
1,7496278,134018330,panjf2000
1,8518239,134018330,gitter-badger
1,145564,134018330,milichev
1,11763614,2,Moonlight-Zhao
1,12420699,2,shanghai-Jerry
1,14950473,2,zqkgo
Expand Down
15 changes: 8 additions & 7 deletions backend/plugins/github/e2e/snapshot_tables/account.csv
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
id,email,full_name,user_name,avatar_url,organization,_raw_data_params,_raw_data_table,_raw_data_id,_raw_data_remark
github:GithubAccount:1:1052632,runner.mei@,runner,runner-mei,https://avatars.githubusercontent.com/u/1052632?v=4,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,13,
github:GithubAccount:1:21979,appleboy.tw@gmail.com,Bo-Yi Wu,appleboy,https://avatars.githubusercontent.com/u/21979?v=4,"COSCUP,nodejs-tw,moztw,h5bp,CodeIgniter-TW,drone,Getmore,golangtw,laravel-taiwan,go-xorm,gin-gonic,PHPConf-TW,Mediatek-Cloud,SJFinder,go-gitea,laradock,gin-contrib,tagfans,maintainers,go-training,go-ggz,the-benchmarker,golang-queue","{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,8,
github:GithubAccount:1:3794113,shanhu5739@gmail.com,Derek,shanhuhai5739,https://avatars.githubusercontent.com/u/3794113?v=4,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,2,
github:GithubAccount:1:3971390,cnliuyunpeng@gmail.com,ppmoon,ppmoon,https://avatars.githubusercontent.com/u/3971390?v=4,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,14,
github:GithubAccount:1:7496278,i@andypan.me,Andy Pan,panjf2000,https://avatars.githubusercontent.com/u/7496278?v=4,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,5,
github:GithubAccount:1:8518239,badger@gitter.im,The Gitter Badger,gitter-badger,https://avatars.githubusercontent.com/u/8518239?v=4,,"{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,9,
github:GithubAccount:1:964542,sarath.sp06@gmail.com,Sarath Sadasivan Pillai,sarathsp06,https://avatars.githubusercontent.com/u/964542?v=4,"exotel,leadmrktr,shellagilehub,odysseyhack,boodltech","{""ConnectionId"":1,""Name"":""panjf2000/ants""}",_raw_github_api_accounts,1,
github:GithubAccount:1:1052632,runner.mei@,runner,runner-mei,https://avatars.githubusercontent.com/u/1052632?v=4,,,,0,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_raw_data fields are missing, please fix it. Thanks.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, will do when I'm back on my computer

github:GithubAccount:1:145564,,,milichev,,,,,0,
github:GithubAccount:1:21979,appleboy.tw@gmail.com,Bo-Yi Wu,appleboy,https://avatars.githubusercontent.com/u/21979?v=4,"COSCUP,nodejs-tw,moztw,h5bp,CodeIgniter-TW,drone,Getmore,golangtw,laravel-taiwan,go-xorm,gin-gonic,PHPConf-TW,Mediatek-Cloud,SJFinder,go-gitea,laradock,gin-contrib,tagfans,maintainers,go-training,go-ggz,the-benchmarker,golang-queue",,,0,
github:GithubAccount:1:3794113,shanhu5739@gmail.com,Derek,shanhuhai5739,https://avatars.githubusercontent.com/u/3794113?v=4,,,,0,
github:GithubAccount:1:3971390,cnliuyunpeng@gmail.com,ppmoon,ppmoon,https://avatars.githubusercontent.com/u/3971390?v=4,,,,0,
github:GithubAccount:1:7496278,i@andypan.me,Andy Pan,panjf2000,https://avatars.githubusercontent.com/u/7496278?v=4,,,,0,
github:GithubAccount:1:8518239,badger@gitter.im,The Gitter Badger,gitter-badger,https://avatars.githubusercontent.com/u/8518239?v=4,,,,0,
github:GithubAccount:1:964542,sarath.sp06@gmail.com,Sarath Sadasivan Pillai,sarathsp06,https://avatars.githubusercontent.com/u/964542?v=4,"exotel,leadmrktr,shellagilehub,odysseyhack,boodltech",,,0,
64 changes: 50 additions & 14 deletions backend/plugins/github/tasks/account_convertor.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/apache/incubator-devlake/core/dal"
"github.com/apache/incubator-devlake/core/errors"
"github.com/apache/incubator-devlake/core/models/common"
"github.com/apache/incubator-devlake/core/models/domainlayer"
"github.com/apache/incubator-devlake/core/models/domainlayer/crossdomain"
"github.com/apache/incubator-devlake/core/models/domainlayer/didgen"
Expand All @@ -30,6 +31,19 @@ import (
"github.com/apache/incubator-devlake/plugins/github/models"
)

// repoAccountForConvert is the row projected by ConvertAccounts' query: every
// account referenced by the repo (from _tool_github_repo_accounts), enriched
// with profile detail from _tool_github_accounts when it was collected. The
// embedded NoPKModel carries the RawDataOrigin across to the domain row.
type repoAccountForConvert struct {
Id int
Login string
Name string
Email string
AvatarUrl string
common.NoPKModel
}

func init() {
RegisterSubtaskMeta(&ConvertAccountsMeta)
}
Expand All @@ -38,12 +52,12 @@ var ConvertAccountsMeta = plugin.SubTaskMeta{
Name: "Convert Users",
EntryPoint: ConvertAccounts,
EnabledByDefault: true,
Description: "Convert tool layer table github_accounts into domain layer table accounts",
Description: "Convert every account referenced by the repo (tool layer repo_accounts, enriched by github_accounts) into domain layer table accounts",
DomainTypes: []string{plugin.DOMAIN_TYPE_CROSS},
DependencyTables: []string{
models.GithubAccount{}.TableName(), // cursor
models.GithubRepoAccount{}.TableName(), // cursor
models.GithubAccountOrg{}.TableName()}, // account id gen
models.GithubRepoAccount{}.TableName(), // cursor (every user referenced by the repo)
models.GithubAccount{}.TableName(), // left-join enrichment (profile detail, optional)
models.GithubAccountOrg{}.TableName()}, // org pluck
ProductTables: []string{crossdomain.Account{}.TableName()},
}

Expand All @@ -53,7 +67,7 @@ func ConvertAccounts(taskCtx plugin.SubTaskContext) errors.Error {

accountIdGen := didgen.NewDomainIdGenerator(&models.GithubAccount{})

converter, err := api.NewStatefulDataConverter(&api.StatefulDataConverterArgs[models.GithubAccount]{
converter, err := api.NewStatefulDataConverter(&api.StatefulDataConverterArgs[repoAccountForConvert]{
SubtaskCommonArgs: &api.SubtaskCommonArgs{
SubTaskContext: taskCtx,
Table: RAW_ACCOUNT_TABLE,
Expand All @@ -62,29 +76,51 @@ func ConvertAccounts(taskCtx plugin.SubTaskContext) errors.Error {
Name: data.Options.Name,
},
},
// Source every account referenced by this repo from _tool_github_repo_accounts
// (which the issue/PR/commit extractors populate for any author, assignee, or
// merged-by user), and LEFT JOIN _tool_github_accounts for profile detail when it
// was collected. This guarantees a domain `accounts` row for every CreatorId /
// AuthorId the other convertors emit, instead of only for users who committed.
// SQL is kept DB-agnostic (no backtick quoting, COALESCE not IFNULL) so it runs on
// both MySQL and PostgreSQL.
Input: func(stateManager *api.SubtaskStateManager) (dal.Rows, errors.Error) {
clauses := []dal.Clause{
dal.Select("_tool_github_accounts.*"),
dal.From(&models.GithubAccount{}),
dal.Select(`_tool_github_repo_accounts.account_id AS id,
_tool_github_repo_accounts.login AS login,
COALESCE(ga.name, '') AS name,
COALESCE(ga.email, '') AS email,
COALESCE(ga.avatar_url, '') AS avatar_url,
_tool_github_repo_accounts._raw_data_params AS _raw_data_params,
_tool_github_repo_accounts._raw_data_table AS _raw_data_table,
_tool_github_repo_accounts._raw_data_id AS _raw_data_id,
_tool_github_repo_accounts._raw_data_remark AS _raw_data_remark`),
dal.From(&models.GithubRepoAccount{}),
dal.Join(`left join _tool_github_accounts ga on (
ga.connection_id = _tool_github_repo_accounts.connection_id
AND ga.id = _tool_github_repo_accounts.account_id
)`),
dal.Where(
"repo_github_id = ? and _tool_github_accounts.connection_id=?",
`_tool_github_repo_accounts.repo_github_id = ?
AND _tool_github_repo_accounts.connection_id = ?
AND _tool_github_repo_accounts.account_id > 0`,
data.Options.GithubId,
data.Options.ConnectionId,
),
dal.Join(`left join _tool_github_repo_accounts gra on (
_tool_github_accounts.connection_id = gra.connection_id
AND _tool_github_accounts.id = gra.account_id
)`),
}
if stateManager.IsIncremental() {
since := stateManager.GetSince()
if since != nil {
clauses = append(clauses, dal.Where("_tool_github_accounts.updated_at >= ?", since))
// Incremental cursor intentionally tracks _tool_github_repo_accounts.updated_at
// (repo membership), not _tool_github_accounts.updated_at (profile freshness):
// account-detail re-enrichment is reconciled on the next full sync. Do not switch
// this back to _tool_github_accounts — that is what left issue/PR-only authors
// orphaned (#8886).
clauses = append(clauses, dal.Where("_tool_github_repo_accounts.updated_at >= ?", since))
}
}
return db.Cursor(clauses...)
},
Convert: func(githubUser *models.GithubAccount) ([]interface{}, errors.Error) {
Convert: func(githubUser *repoAccountForConvert) ([]interface{}, errors.Error) {
// query related orgs
var orgs []string
err := db.Pluck(`org_login`, &orgs,
Expand Down
10 changes: 10 additions & 0 deletions backend/plugins/github/tasks/pr_extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,16 @@ func ExtractApiPullRequests(taskCtx plugin.SubTaskContext) errors.Error {
githubPr.AuthorName = githubUser.Login
githubPr.AuthorId = githubUser.AccountId
}
// Emit a repo_account for the merged-by user too, so pull_requests.merged_by_id
// resolves to a domain account instead of an orphan FK (ConvertAccounts sources
// every referenced user from _tool_github_repo_accounts).
if body.MergedBy != nil {
mergedByUser, err := convertAccount(body.MergedBy, data.Options.GithubId, data.Options.ConnectionId)
if err != nil {
return nil, err
}
results = append(results, mergedByUser)
}
for _, label := range body.Labels {
results = append(results, &models.GithubPrLabel{
ConnectionId: data.Options.ConnectionId,
Expand Down
Loading