Mercurial > hg
comparison hgext/releasenotes.py @ 33698:3748098d072a
releasenotes: add similarity check function to compare incoming notes
It is possible that the incoming note fragments have some similar content as the
existing release notes. In case of a bug fix, we match for issueNNNN in the
existing notes. For other general cases, it makes use of fuzzywuzzy library to get
a similarity score. If the score is above a certain threshold, we ignore the
fragment, otherwise add it. But the score might be misleading for small commit
messages. So, it uses similarity function only if the length of string (in words)
is above a certain value. The patch adds tests related to its usage. But it needs
improvement in the sense of combining incoming notes. We can use interactive mode
for adding notes. Maybe we can do this if similarity is under a certain range.
author | Rishabh Madan <rishabhmadan96@gmail.com> |
---|---|
date | Sat, 05 Aug 2017 05:25:36 +0530 |
parents | 9a944e908ecf |
children | 589fda7895da |
comparison
equal
deleted
inserted
replaced
33697:4d1e79945c2e | 33698:3748098d072a |
---|---|
44 ('perf', _('Performance Improvements')), | 44 ('perf', _('Performance Improvements')), |
45 ('api', _('API Changes')), | 45 ('api', _('API Changes')), |
46 ] | 46 ] |
47 | 47 |
48 RE_DIRECTIVE = re.compile('^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$') | 48 RE_DIRECTIVE = re.compile('^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$') |
49 RE_ISSUE = r'\bissue ?[0-9]{4,6}(?![0-9])\b' | |
49 | 50 |
50 BULLET_SECTION = _('Other Changes') | 51 BULLET_SECTION = _('Other Changes') |
51 | 52 |
52 class parsedreleasenotes(object): | 53 class parsedreleasenotes(object): |
53 def __init__(self): | 54 def __init__(self): |
90 """Merge another instance into this one. | 91 """Merge another instance into this one. |
91 | 92 |
92 This is used to combine multiple sources of release notes together. | 93 This is used to combine multiple sources of release notes together. |
93 """ | 94 """ |
94 for section in other: | 95 for section in other: |
96 existingnotes = converttitled(self.titledforsection(section)) + \ | |
97 convertnontitled(self.nontitledforsection(section)) | |
95 for title, paragraphs in other.titledforsection(section): | 98 for title, paragraphs in other.titledforsection(section): |
96 if self.hastitledinsection(section, title): | 99 if self.hastitledinsection(section, title): |
97 # TODO prompt for resolution if different and running in | 100 # TODO prompt for resolution if different and running in |
98 # interactive mode. | 101 # interactive mode. |
99 ui.write(_('%s already exists in %s section; ignoring\n') % | 102 ui.write(_('%s already exists in %s section; ignoring\n') % |
100 (title, section)) | 103 (title, section)) |
101 continue | 104 continue |
102 | 105 |
103 # TODO perform similarity comparison and try to match against | 106 incoming_str = converttitled([(title, paragraphs)])[0] |
104 # existing. | 107 if section == 'fix': |
108 issue = getissuenum(incoming_str) | |
109 if issue: | |
110 if findissue(ui, existingnotes, issue): | |
111 continue | |
112 | |
113 if similar(ui, existingnotes, incoming_str): | |
114 continue | |
115 | |
105 self.addtitleditem(section, title, paragraphs) | 116 self.addtitleditem(section, title, paragraphs) |
106 | 117 |
107 for paragraphs in other.nontitledforsection(section): | 118 for paragraphs in other.nontitledforsection(section): |
108 if paragraphs in self.nontitledforsection(section): | 119 if paragraphs in self.nontitledforsection(section): |
109 continue | 120 continue |
110 | 121 |
111 # TODO perform similarily comparison and try to match against | 122 incoming_str = convertnontitled([paragraphs])[0] |
112 # existing. | 123 if section == 'fix': |
124 issue = getissuenum(incoming_str) | |
125 if issue: | |
126 if findissue(ui, existingnotes, issue): | |
127 continue | |
128 | |
129 if similar(ui, existingnotes, incoming_str): | |
130 continue | |
131 | |
113 self.addnontitleditem(section, paragraphs) | 132 self.addnontitleditem(section, paragraphs) |
114 | 133 |
115 class releasenotessections(object): | 134 class releasenotessections(object): |
116 def __init__(self, ui, repo=None): | 135 def __init__(self, ui, repo=None): |
117 if repo: | 136 if repo: |
134 if value == title: | 153 if value == title: |
135 return name | 154 return name |
136 | 155 |
137 return None | 156 return None |
138 | 157 |
158 def converttitled(titledparagraphs): | |
159 """ | |
160 Convert titled paragraphs to strings | |
161 """ | |
162 string_list = [] | |
163 for title, paragraphs in titledparagraphs: | |
164 lines = [] | |
165 for para in paragraphs: | |
166 lines.extend(para) | |
167 string_list.append(' '.join(lines)) | |
168 return string_list | |
169 | |
170 def convertnontitled(nontitledparagraphs): | |
171 """ | |
172 Convert non-titled bullets to strings | |
173 """ | |
174 string_list = [] | |
175 for paragraphs in nontitledparagraphs: | |
176 lines = [] | |
177 for para in paragraphs: | |
178 lines.extend(para) | |
179 string_list.append(' '.join(lines)) | |
180 return string_list | |
181 | |
182 def getissuenum(incoming_str): | |
183 """ | |
184 Returns issue number from the incoming string if it exists | |
185 """ | |
186 issue = re.search(RE_ISSUE, incoming_str, re.IGNORECASE) | |
187 if issue: | |
188 issue = issue.group() | |
189 return issue | |
190 | |
191 def findissue(ui, existing, issue): | |
192 """ | |
193 Returns true if issue number already exists in notes. | |
194 """ | |
195 if any(issue in s for s in existing): | |
196 ui.write(_('"%s" already exists in notes; ignoring\n') % issue) | |
197 return True | |
198 else: | |
199 return False | |
200 | |
201 def similar(ui, existing, incoming_str): | |
202 """ | |
203 Returns true if similar note found in existing notes. | |
204 """ | |
205 if len(incoming_str.split()) > 10: | |
206 merge = similaritycheck(incoming_str, existing) | |
207 if not merge: | |
208 ui.write(_('"%s" already exists in notes file; ignoring\n') | |
209 % incoming_str) | |
210 return True | |
211 else: | |
212 return False | |
213 else: | |
214 return False | |
215 | |
216 def similaritycheck(incoming_str, existingnotes): | |
217 """ | |
218 Returns true when note fragment can be merged to existing notes. | |
219 """ | |
220 import fuzzywuzzy.fuzz as fuzz | |
221 merge = True | |
222 for bullet in existingnotes: | |
223 score = fuzz.token_set_ratio(incoming_str, bullet) | |
224 if score > 75: | |
225 merge = False | |
226 break | |
227 return merge | |
228 | |
139 def getcustomadmonitions(repo): | 229 def getcustomadmonitions(repo): |
140 ctx = repo['.'] | 230 ctx = repo['.'] |
141 p = config.config() | 231 p = config.config() |
142 | 232 |
143 def read(f, sections=None, remap=None): | 233 def read(f, sections=None, remap=None): |