comparison hgext/releasenotes.py @ 33698:3748098d072a

releasenotes: add similarity check function to compare incoming notes It is possible that the incoming note fragments have some similar content as the existing release notes. In case of a bug fix, we match for issueNNNN in the existing notes. For other general cases, it makes use of fuzzywuzzy library to get a similarity score. If the score is above a certain threshold, we ignore the fragment, otherwise add it. But the score might be misleading for small commit messages. So, it uses similarity function only if the length of string (in words) is above a certain value. The patch adds tests related to its usage. But it needs improvement in the sense of combining incoming notes. We can use interactive mode for adding notes. Maybe we can do this if similarity is under a certain range.
author Rishabh Madan <rishabhmadan96@gmail.com>
date Sat, 05 Aug 2017 05:25:36 +0530
parents 9a944e908ecf
children 589fda7895da
comparison
equal deleted inserted replaced
33697:4d1e79945c2e 33698:3748098d072a
44 ('perf', _('Performance Improvements')), 44 ('perf', _('Performance Improvements')),
45 ('api', _('API Changes')), 45 ('api', _('API Changes')),
46 ] 46 ]
47 47
48 RE_DIRECTIVE = re.compile('^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$') 48 RE_DIRECTIVE = re.compile('^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$')
49 RE_ISSUE = r'\bissue ?[0-9]{4,6}(?![0-9])\b'
49 50
50 BULLET_SECTION = _('Other Changes') 51 BULLET_SECTION = _('Other Changes')
51 52
52 class parsedreleasenotes(object): 53 class parsedreleasenotes(object):
53 def __init__(self): 54 def __init__(self):
90 """Merge another instance into this one. 91 """Merge another instance into this one.
91 92
92 This is used to combine multiple sources of release notes together. 93 This is used to combine multiple sources of release notes together.
93 """ 94 """
94 for section in other: 95 for section in other:
96 existingnotes = converttitled(self.titledforsection(section)) + \
97 convertnontitled(self.nontitledforsection(section))
95 for title, paragraphs in other.titledforsection(section): 98 for title, paragraphs in other.titledforsection(section):
96 if self.hastitledinsection(section, title): 99 if self.hastitledinsection(section, title):
97 # TODO prompt for resolution if different and running in 100 # TODO prompt for resolution if different and running in
98 # interactive mode. 101 # interactive mode.
99 ui.write(_('%s already exists in %s section; ignoring\n') % 102 ui.write(_('%s already exists in %s section; ignoring\n') %
100 (title, section)) 103 (title, section))
101 continue 104 continue
102 105
103 # TODO perform similarity comparison and try to match against 106 incoming_str = converttitled([(title, paragraphs)])[0]
104 # existing. 107 if section == 'fix':
108 issue = getissuenum(incoming_str)
109 if issue:
110 if findissue(ui, existingnotes, issue):
111 continue
112
113 if similar(ui, existingnotes, incoming_str):
114 continue
115
105 self.addtitleditem(section, title, paragraphs) 116 self.addtitleditem(section, title, paragraphs)
106 117
107 for paragraphs in other.nontitledforsection(section): 118 for paragraphs in other.nontitledforsection(section):
108 if paragraphs in self.nontitledforsection(section): 119 if paragraphs in self.nontitledforsection(section):
109 continue 120 continue
110 121
111 # TODO perform similarily comparison and try to match against 122 incoming_str = convertnontitled([paragraphs])[0]
112 # existing. 123 if section == 'fix':
124 issue = getissuenum(incoming_str)
125 if issue:
126 if findissue(ui, existingnotes, issue):
127 continue
128
129 if similar(ui, existingnotes, incoming_str):
130 continue
131
113 self.addnontitleditem(section, paragraphs) 132 self.addnontitleditem(section, paragraphs)
114 133
115 class releasenotessections(object): 134 class releasenotessections(object):
116 def __init__(self, ui, repo=None): 135 def __init__(self, ui, repo=None):
117 if repo: 136 if repo:
134 if value == title: 153 if value == title:
135 return name 154 return name
136 155
137 return None 156 return None
138 157
158 def converttitled(titledparagraphs):
159 """
160 Convert titled paragraphs to strings
161 """
162 string_list = []
163 for title, paragraphs in titledparagraphs:
164 lines = []
165 for para in paragraphs:
166 lines.extend(para)
167 string_list.append(' '.join(lines))
168 return string_list
169
170 def convertnontitled(nontitledparagraphs):
171 """
172 Convert non-titled bullets to strings
173 """
174 string_list = []
175 for paragraphs in nontitledparagraphs:
176 lines = []
177 for para in paragraphs:
178 lines.extend(para)
179 string_list.append(' '.join(lines))
180 return string_list
181
182 def getissuenum(incoming_str):
183 """
184 Returns issue number from the incoming string if it exists
185 """
186 issue = re.search(RE_ISSUE, incoming_str, re.IGNORECASE)
187 if issue:
188 issue = issue.group()
189 return issue
190
191 def findissue(ui, existing, issue):
192 """
193 Returns true if issue number already exists in notes.
194 """
195 if any(issue in s for s in existing):
196 ui.write(_('"%s" already exists in notes; ignoring\n') % issue)
197 return True
198 else:
199 return False
200
201 def similar(ui, existing, incoming_str):
202 """
203 Returns true if similar note found in existing notes.
204 """
205 if len(incoming_str.split()) > 10:
206 merge = similaritycheck(incoming_str, existing)
207 if not merge:
208 ui.write(_('"%s" already exists in notes file; ignoring\n')
209 % incoming_str)
210 return True
211 else:
212 return False
213 else:
214 return False
215
216 def similaritycheck(incoming_str, existingnotes):
217 """
218 Returns true when note fragment can be merged to existing notes.
219 """
220 import fuzzywuzzy.fuzz as fuzz
221 merge = True
222 for bullet in existingnotes:
223 score = fuzz.token_set_ratio(incoming_str, bullet)
224 if score > 75:
225 merge = False
226 break
227 return merge
228
139 def getcustomadmonitions(repo): 229 def getcustomadmonitions(repo):
140 ctx = repo['.'] 230 ctx = repo['.']
141 p = config.config() 231 p = config.config()
142 232
143 def read(f, sections=None, remap=None): 233 def read(f, sections=None, remap=None):