forked from Qortal/Brooklyn
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
157 lines
4.7 KiB
157 lines
4.7 KiB
#!/usr/bin/env python2 |
|
# SPDX-License-Identifier: GPL-2.0+ |
|
# |
|
# Copyright (C) 2014, Masahiro Yamada <[email protected]> |
|
|
|
''' |
|
A tool to create/update the mailmap file |
|
|
|
The command 'git shortlog' summarizes git log output in a format suitable |
|
for inclusion in release announcements. Each commit will be grouped by |
|
author and title. |
|
|
|
One problem is that the authors' name and/or email address is sometimes |
|
spelled differently. The .mailmap feature can be used to coalesce together |
|
commits by the same persion. |
|
(See 'man git-shortlog' for furthur information of this feature.) |
|
|
|
This tool helps to create/update the mailmap file. |
|
|
|
It runs 'git shortlog' internally and searches differently spelled author |
|
names which share the same email address. The author name with the most |
|
commits is asuumed to be a canonical real name. If the number of commits |
|
from the cananonical name is equal to or greater than 'MIN_COMMITS', |
|
the entry for the cananical name will be output. ('MIN_COMMITS' is used |
|
here because we do not want to create a fat mailmap by adding every author |
|
with only a few commits.) |
|
|
|
If there exists a mailmap file specified by the mailmap.file configuration |
|
options or '.mailmap' at the toplevel of the repository, it is used as |
|
a base file. (The mailmap.file configuration takes precedence over the |
|
'.mailmap' file if both exist.) |
|
|
|
The base file and the newly added entries are merged together and sorted |
|
alphabetically (but the comment block is kept untouched), and then printed |
|
to standard output. |
|
|
|
Usage |
|
----- |
|
|
|
scripts/mailmapper |
|
|
|
prints the mailmapping to standard output. |
|
|
|
scripts/mailmapper > tmp; mv tmp .mailmap |
|
|
|
will be useful for updating '.mailmap' file. |
|
''' |
|
|
|
import sys |
|
import os |
|
import subprocess |
|
|
|
# The entries only for the canonical names with MIN_COMMITS or more commits. |
|
# This limitation is used so as not to create a too big mailmap file. |
|
MIN_COMMITS = 50 |
|
|
|
try: |
|
toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) |
|
except subprocess.CalledProcessError: |
|
sys.exit('Please run in a git repository.') |
|
|
|
# strip '\n' |
|
toplevel = toplevel.rstrip() |
|
|
|
# Change the current working directory to the toplevel of the respository |
|
# for our easier life. |
|
os.chdir(toplevel) |
|
|
|
# First, create 'auther name' vs 'number of commits' database. |
|
# We assume the name with the most commits as the canonical real name. |
|
shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) |
|
|
|
commits_per_name = {} |
|
|
|
for line in shortlog.splitlines(): |
|
try: |
|
commits, name = line.split(None, 1) |
|
except ValueError: |
|
# ignore lines with an empty author name |
|
pass |
|
commits_per_name[name] = int(commits) |
|
|
|
# Next, coalesce the auther names with the same email address |
|
shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) |
|
|
|
mail_vs_name = {} |
|
output = {} |
|
|
|
for line in shortlog.splitlines(): |
|
# tmp, mail = line.rsplit(None, 1) is not safe |
|
# because weird email addresses might include whitespaces |
|
tmp, mail = line.split('<') |
|
mail = '<' + mail.rstrip() |
|
try: |
|
_, name = tmp.rstrip().split(None, 1) |
|
except ValueError: |
|
# author name is empty |
|
name = '' |
|
if mail in mail_vs_name: |
|
# another name for the same email address |
|
prev_name = mail_vs_name[mail] |
|
# Take the name with more commits |
|
major_name = sorted([prev_name, name], |
|
key=lambda x: commits_per_name[x] if x else 0)[1] |
|
mail_vs_name[mail] = major_name |
|
if commits_per_name[major_name] > MIN_COMMITS: |
|
output[mail] = major_name |
|
else: |
|
mail_vs_name[mail] = name |
|
|
|
# [1] If there exists a mailmap file at the location pointed to |
|
# by the mailmap.file configuration option, update it. |
|
# [2] If the file .mailmap exists at the toplevel of the repository, update it. |
|
# [3] Otherwise, create a new mailmap file. |
|
mailmap_files = [] |
|
|
|
try: |
|
config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) |
|
except subprocess.CalledProcessError: |
|
config_mailmap = '' |
|
|
|
config_mailmap = config_mailmap.rstrip() |
|
if config_mailmap: |
|
mailmap_files.append(config_mailmap) |
|
|
|
mailmap_files.append('.mailmap') |
|
|
|
infile = None |
|
|
|
for map_file in mailmap_files: |
|
try: |
|
infile = open(map_file) |
|
except: |
|
# Failed to open. Try next. |
|
continue |
|
break |
|
|
|
comment_block = [] |
|
output_lines = [] |
|
|
|
if infile: |
|
for line in infile: |
|
if line[0] == '#' or line[0] == '\n': |
|
comment_block.append(line) |
|
else: |
|
output_lines.append(line) |
|
break |
|
for line in infile: |
|
output_lines.append(line) |
|
infile.close() |
|
|
|
for mail, name in output.items(): |
|
output_lines.append(name + ' ' + mail + '\n') |
|
|
|
output_lines.sort() |
|
|
|
sys.stdout.write(''.join(comment_block + output_lines))
|
|
|