summaryrefslogtreecommitdiff
path: root/python/problems/dictionaries/text
diff options
context:
space:
mode:
Diffstat (limited to 'python/problems/dictionaries/text')
-rw-r--r--python/problems/dictionaries/text/common.py86
-rw-r--r--python/problems/dictionaries/text/en.py13
-rw-r--r--python/problems/dictionaries/text/sl.py45
-rw-r--r--python/problems/dictionaries/text/tmp.py29
4 files changed, 173 insertions, 0 deletions
diff --git a/python/problems/dictionaries/text/common.py b/python/problems/dictionaries/text/common.py
new file mode 100644
index 0000000..134f69b
--- /dev/null
+++ b/python/problems/dictionaries/text/common.py
@@ -0,0 +1,86 @@
+import re
+from python.util import has_token_sequence, string_almost_equal, \
+ string_contains_number, get_tokens, get_numbers, get_exception_desc
+from server.hints import Hint
+
+id = 20612
+number = 13
+visible = True
+
+solution = '''\
+import collections
+
+def following_words(txt):
+ words = txt.split()
+ freq = collections.defaultdict(list)
+ for word, next_word in zip(words, words[1:]):
+ freq[word].append(next_word)
+ return freq
+
+def freq_following_word(txt):
+ following = following_words(txt)
+ for f in following:
+ vals = collections.Counter(following[f])
+ s = sorted(vals.most_common(), key = lambda x: (-x[1], x[0]))
+ following[f] = s[0][0]
+ return following
+
+def text(word, full_text, num):
+ freq = freq_following_word(full_text)
+ words = []
+ for i in range(num):
+ words.append(word)
+ word = freq[word]
+ return ' '.join(words)
+'''
+
+hint_type = {
+ 'final_hint': Hint('final_hint')
+}
+
+def test(python, code):
+ func_name = 'text'
+ tokens = get_tokens(code)
+ if not has_token_sequence(tokens, ['def', func_name]):
+ return False, [{'id' : 'no_func_name', 'args' : {'func_name' : func_name}}]
+
+ in_out = [
+ (('in', 'in in in in', 5), 'in in in in in'),
+ (('in', 'in to in ono in to smo mi', 5), 'in to in to in'),
+ (('danes', 'danes je lep dan danes sije sonce', 5),
+ 'danes je lep dan danes'),
+ (('danes', 'danes je lep dan danes sije sonce danes sije dan ki je sonce', 5),
+ 'danes sije dan danes sije'),
+ ]
+
+ test_in = [(func_name+'%s'%str(l[0]), None) for l in in_out]
+ test_out = [l[1] for l in in_out]
+
+ answers = python(code=code, inputs=test_in, timeout=1.0)
+ n_correct = 0
+ tin, tout = None, None
+ for i, (ans, to) in enumerate(zip(answers, test_out)):
+ corr = ans[0] == to
+ n_correct += corr
+ if not corr:
+ tin = test_in[i][0]
+ tout = to
+
+ passed = n_correct == len(test_in)
+ hints = [{'id': 'test_results', 'args': {'passed': n_correct, 'total': len(test_in)}}]
+ if tin:
+ hints.append({'id': 'problematic_test_case', 'args': {'testin': str(tin), 'testout': str(tout)}})
+ if passed:
+ hints.append({'id': 'final_hint'})
+ return passed, hints
+
+
+def hint(python, code):
+ tokens = get_tokens(code)
+
+ # run one test first to see if there are any exceptions
+ answer = python(code=code, inputs=[(None, None)], timeout=1.0)
+ exc = get_exception_desc(answer[0][3])
+ if exc: return exc
+
+ return None
diff --git a/python/problems/dictionaries/text/en.py b/python/problems/dictionaries/text/en.py
new file mode 100644
index 0000000..4e5f5b3
--- /dev/null
+++ b/python/problems/dictionaries/text/en.py
@@ -0,0 +1,13 @@
+id = 20612
+name = 'Generated text'
+
+description = '''\
+<p>(translation missing)</p>'''
+
+hint = {
+ 'plan': '''\
+<p>(translation missing)</p>''',
+
+ 'no_input_call': '''\
+<p>(translation missing)</p>''',
+} \ No newline at end of file
diff --git a/python/problems/dictionaries/text/sl.py b/python/problems/dictionaries/text/sl.py
new file mode 100644
index 0000000..3afeec4
--- /dev/null
+++ b/python/problems/dictionaries/text/sl.py
@@ -0,0 +1,45 @@
+import server
+mod = server.problems.load_language('python', 'sl')
+
+
+id = 20612
+name = 'Generirano besedilo'
+
+description = '''\
+<p>
+Napisati želimo program, ki bo generiral tipičen stavek. Seveda ni dobro,
+da si samo naključno izbiramo besede in jih lepimo skupaj, saj bi tako dobili
+nekaj povsem neberljivega. Naloge se bomo lotili malo pametneje.
+Recimo, da ima program na voljo nek tekst, npr. <code>'in to in ono smo mi'</code>,
+iz katerega se lahko uči. Naš tekst bomo začeli z izbrano besedo.
+Nadaljujemo tako, da se vprašamo katera beseda se v učnem tekstu pojavi
+najpogosteje za izbrano besedo. Če začnemo z besedo <code>to</code>, potem
+bo naslednja beseda <code>in</code>. Postopek nato ponovimo z besedo <code>in</code>.
+</p>
+
+<p>
+Napišite funkcijo <code>text(word, full_text, num)</code>, ki sprejme začetno
+besedo <code>word</code>, celotno besedilo <code>full_text</code>,
+ter generira besedilo dolgo <code>num</code> besed.
+</p>
+
+<p> Da bodo generirani stavki bolj zanimivi, lahko program testiraš na
+kakšnem romanu, npr. Orwellovi noveli 1984. Vendar pa tega ne boš mogel
+izvajati v CodeQ, saj nima dostopa do mreže. Poženi iz kakšnega drugega programa,
+npr. iz pyCharma ali kar iz ukazne vrstice.
+<pre>
+>>> import urllib.request
+>>> txt = urllib.request.urlopen('http://squeeb1134.tripod.com/1984.txt').read().decode('utf8')
+>>> text('Big', txt, 15)
+'Big Brother is not be a few minutes at the Party member of the Party'
+</pre>
+'''
+
+plan = []
+
+hint = {
+ 'final_hint': ['''\
+<p>Program je pravilen! <br>
+</p>
+'''],
+}
diff --git a/python/problems/dictionaries/text/tmp.py b/python/problems/dictionaries/text/tmp.py
new file mode 100644
index 0000000..bf56c4f
--- /dev/null
+++ b/python/problems/dictionaries/text/tmp.py
@@ -0,0 +1,29 @@
+import collections
+
+def following_words(txt):
+ words = txt.split()
+ freq = collections.defaultdict(list)
+ for word, next_word in zip(words, words[1:]):
+ freq[word].append(next_word)
+ return freq
+
+def freq_following_word(txt):
+ following = following_words(txt)
+ for f in following:
+ vals = collections.Counter(following[f])
+ s = sorted(vals.most_common(), key = lambda x: (-x[1], x[0]))
+ following[f] = s[0][0]
+ return following
+
+def text(word, freq, num):
+ words = []
+ for i in range(num):
+ words.append(word)
+ word = freq[word]
+ return ' '.join(words)
+
+
+import urllib.request
+txt = 'danes je lep dan danes sije sonce danes sije dan ki je sonce'
+#urllib.request.urlopen('http://squeeb1134.tripod.com/1984.txt').read().decode('utf8')
+print (text('danes', freq_following_word(txt), 5))