東京大学 情報理工学系研究科 創造情報学専攻 2021年8月実施 プログラミング 第2問
Author
Description (English)
We store the daily numbers of newly infected people with some virus (new infections, below) in a text file in chronological order. The numbers are separated by colons (:). For example, when the numbers of new infections for 5 days are
621 591 907 1121 1032
the following text
621:591:907:1121:1032
is stored in a file.
Problem
(1) Let infections.txt. We define the following function
where
Calculate the maximum and minimum values of
(2) Let
where
Among arbitrary pairs of two files in the folder data, find the pair with the highest similarity score and write the two file names on the answer sheet. Furthermore, write the similarity score on the answer sheet. When more than one such pair is found, write all the pairs and their scores.
(3) Let infections2.txt. These numbers are denoted by
We find the approximate formula
where
Calculate
(4) Let infections2.txt. For a given
We find the approximate formula
for
Find
Kai
The sample data files are here.
(1)
itsuitsuki's solution
import numpy as np
conv_result = np.convolve(orig_infelst, np.array([1,1,1,1,1,1,1])/7, 'valid')
print(conv_result.max(), conv_result.min(), conv_result.sum())
The maximum is 1924.4286 and minimum 3.5714. The sum is 165579.5714.
FunTotal's solution
/*
按照题意模拟,c++要注意精度问题,由于没有数据文件,不知道题目的精度要求怎么样,保险起见用long long和long double尽量保持精度,除法也是最后再除
*/
#include <bits/stdc++.h>
#define int long long
#define db long double
using namespace std;
void solve() {
ifstream fin("E:/UTokyo_Entrance_Exam/CI/2022_summer/infections.txt",
ios::in);
ofstream fout("E:/UTokyo_Entrance_Exam/CI/2022_summer/ans21.txt", ios::out);
if (!fin.is_open())
assert(0);
string str;
fin >> str;
vector<int> vec; // 先处理文件输入读到vec里面
int num = 0;
for (auto ch : str) { // 直接暴力逐字符读取, 可能有直接用库以冒号切割的方法
if (ch == ':') {
vec.push_back(num);
num = 0;
} else
num = num * 10 + ch - '0';
}
vec.push_back(num);
db mx = -1e9, mn = 1e9;
int sum = 0, n = vec.size();
for (int i = 3; i < n - 3; i++) {
int tem = 0;
for (int k = -3; k <= 3; k++)
tem += vec[i + k];
db ave = (db) tem / 7.0;
mx = max(mx, ave), mn = min(mn, ave);
sum += tem; //c++有浮点误差,尽量用整数,最后再统一除7.0
}
fout << fixed << setprecision(4) << "max ave = " << mx << "\n"
<< "min ave = " << mn << "\n"
<< "sum of ave = " << (db)((db)sum / 7.0) << "\n";
}
signed main() {
int t = 1;
// cin >> t;
while (t--)
solve();
return 0;
}
(2)
itsuitsuki's solution
def similarity_between_lists(l1, l2):
l1 = np.array(l1)
l2 = np.array(l2)
if len(l1) < len(l2):
t = l1
l1 = l2
l2 = t
s = float('-inf')
for i in range(len(l1) - len(l2) + 1):
slice_1 = l1[i:i+len(l2)]
tmp = ((slice_1 - l2)**2).sum()
s = max(s, -tmp)
return int(s)
all_pairs = []
lists = [open(tmp).readline().split(':') for tmp in filelist]
lists = [[int(s) for s in l] for l in lists]
for i in range(len(lists)):
for j in range(i+1, len(lists)):
all_pairs += [(similarity_between_lists(lists[i], lists[j]), i, j)]
maximal = max(all_pairs)
print(maximal)
for pair in all_pairs:
if pair[0] == maximal[0]:
print(pair, filelist[pair[1]], filelist[pair[2]])
The highest similarity score is -6294. With data02.txt and data42.txt.
FunTotal's solution
/*
这题也是复用前面遍历文件夹的方法,按照题目的公式模拟一下
*/
#include <bits/stdc++.h>
#define int long long
#define pss pair<string, string>
using namespace std;
namespace fs = filesystem;
vector<int> readfile(string path) {
ifstream fin(path, ios::in);
if (!fin.is_open())
assert(0);
string str;
fin >> str;
vector<int> vec; // 先处理文件输入读到vec里面
int num = 0;
for (auto ch : str) { // 直接暴力逐字符读取, 可能有直接用库以冒号切割的方法
if (ch == ':') {
vec.push_back(num);
num = 0;
} else
num = num * 10 + ch - '0';
}
vec.push_back(num);
return vec;
}
int cal(string path1, string path2) {
vector<int> x = readfile(path1), y = readfile(path2);
if (x.size() < y.size()) swap(x, y);
int m = x.size(), n = y.size();
int point = 1e18;
for (int i = 0; i <= m - n; i++) {
int tem = 0;
for (int k = 0; k <= n - 1; k++)
tem += (x[k + i] - y[k]) * (x[k + i] - y[k]);
point = min(point, tem);
}
return -point;
}
void solve() {
string folder_path = "E:/UTokyo_Entrance_Exam/CI/2022_summer/data_forder/";
ofstream fout("E:/UTokyo_Entrance_Exam/CI/2022_summer/ans22.txt", ios::out);
vector<string> paths;
for (const auto& entry : fs::directory_iterator(folder_path)) {
if (entry.is_regular_file()) {
string file_path = entry.path().string();
try {
paths.push_back(file_path);
} catch (const exception& e) {
cout << "Error processing file " << file_path << ": "
<< e.what() << endl;
}
}
}
int mxpoint = -1e18;
vector<pss> res;
for (int i = 0; i < paths.size(); i++)
for (int j = i + 1; j < paths.size(); j++) {
int point = cal(paths[i], paths[j]);
if (point > mxpoint) mxpoint = point, res.clear(), res.push_back({paths[i], paths[j]});
else if (point == mxpoint) res.push_back({paths[i], paths[j]});
}
for (auto [a, b] : res)
fout << "file names : " << a << " " << "and " << b << "\n";
fout << "their scores are " << mxpoint << "\n";
}
signed main() {
int t = 1;
// cin >> t;
while (t--)
solve();
return 0;
}
(3)
itsuitsuki's solution
with open('infections2.txt') as f_infe:
infelst2 = f_infe.readline().split(':')
infelst2 = [int(s) for s in infelst2]
Y = np.array(infelst2)
n = len(infelst2)
X = np.arange(n)
all_1 = np.ones(n)
a = ((n * X @ Y) - X.sum() * Y.sum()) / (n * (X@X) - X.sum()**2)
print(float(a))
k = ((X@X) * Y.sum() - (X@Y) * X.sum()) / (n * (X@X) - (X.sum())**2)
print(float(k))
FunTotal's solution
/*
题目给出最小二乘法公式,按照题目计算一下,同样注意精度问题
*/
#include <bits/stdc++.h>
#define int long long
#define db long double
using namespace std;
void solve() {
ifstream fin("E:/UTokyo_Entrance_Exam/CI/2022_summer/infections2.txt",
ios::in);
ofstream fout("E:/UTokyo_Entrance_Exam/CI/2022_summer/ans23.txt", ios::out);
if (!fin.is_open())
assert(0);
string str;
fin >> str;
vector<int> vec; // 先处理文件输入读到vec里面
int num = 0;
for (auto ch : str) { // 直接暴力逐字符读取, 可能有直接用库以冒号切割的方法
if (ch == ':') {
vec.push_back(num);
num = 0;
} else
num = num * 10 + ch - '0';
}
vec.push_back(num);
int sum_ixi = 0, sum_i = 0, sum_i2 = 0, sum_xi = 0;
int n = vec.size();
for (int i = 0; i < n - 1; i++) {
sum_ixi += i * vec[i];
sum_i += i;
sum_xi += vec[i];
sum_i2 += i * i;
}
db a = ((db)n * sum_ixi - sum_i * sum_xi) / (n * sum_i2 - (sum_i) * (sum_i));
db k = ((db)sum_i2 * sum_xi - sum_ixi * sum_i) / (n * sum_i2 - sum_i * sum_i);
fout << fixed << setprecision(4) << "a = " << a << ", k = " << k << "\n";
}
signed main() {
int t = 1;
// cin >> t;
while (t--)
solve();
return 0;
}
(4)
itsuitsuki's solution
By plugging in the paradigm of (3), since
And we find the max of
def metric_a_k(l, s): # sublen=31
sub = l[s:s+31]
X = np.arange(31)
Y = np.log(sub + 1)
loga = ((n * X @ Y) - X.sum() * Y.sum()) / (n * (X@X) - X.sum()**2)
logk = ((X@X) * Y.sum() - (X@Y) * X.sum()) / (n * (X@X) - (X.sum())**2)
return np.exp(loga), np.exp(logk)
infelst2 = np.array(infelst2)
a_s = []
k_s = []
for s in range(len(infelst2) - 30):
a, k = metric_a_k(infelst2, s)
a_s += [a]
k_s += [k]
max_a = max(a_s)
for i, a in enumerate(a_s):
if a == max_a:
print(i,a,k_s[i]) # s,a,k
(s,a,k) is (389, 1.4010, 1.1299).
FunTotal's solution
/*
这题有点思维含量,要注意到合理利用第三问的公式,取对数转换为第三问的线性问题。要注意c++没有loge函数,还需要用换底公式 loge x = log2(x) / log2(e), c++中的exp(x)函数是 e ^ x, exp(1) 即为自然对数e
*/
#include <bits/stdc++.h>
#define int long long
#define db long double
#define pii pair<int, int>
using namespace std;
vector<int> readfile(string path) {
ifstream fin(path, ios::in);
if (!fin.is_open())
assert(0);
string str;
fin >> str;
vector<int> vec; // 先处理文件输入读到vec里面
int num = 0;
for (auto ch : str) { // 直接暴力逐字符读取, 可能有直接用库以冒号切割的方法
if (ch == ':') {
vec.push_back(num);
num = 0;
} else
num = num * 10 + ch - '0';
}
vec.push_back(num);
return vec;
}
pii calak(vector<db> vec) { // 给定 x 求线性的最小拟合的 a 和 k
db sum_ixi = 0, sum_i = 0, sum_i2 = 0, sum_xi = 0;
int n = vec.size();
for (int i = 0; i < n - 1; i++) {
sum_ixi += i * vec[i];
sum_i += i;
sum_xi += vec[i];
sum_i2 += i * i;
}
db a =
((db)n * sum_ixi - sum_i * sum_xi) / (n * sum_i2 - (sum_i) * (sum_i));
db k =
((db)sum_i2 * sum_xi - sum_ixi * sum_i) / (n * sum_i2 - sum_i * sum_i);
return {exp(a) * 10000, exp(k) * 10000};
}
void solve() {
ofstream fout("E:/UTokyo_Entrance_Exam/CI/2022_summer/ans24.txt", ios::out);
vector<int> vec = readfile("E:/UTokyo_Entrance_Exam/CI/2022_summer/infections3.txt");
vector<array<int, 3>> res;
int mxa = -1e18;
int n = vec.size();
for (int s = 0; s < n - 30; s++) {
vector<db> x;
for (int i = s; i <= s + 30; i++)
x.push_back(log(vec[i] + 1) / log(exp(1)));
auto [a, k] = calak(x);
if (a > mxa) mxa = a, res.clear(), res.push_back({s, a, k});
else if (a == mxa) res.push_back({s, a, k});
}
for (auto [s, a, k] : res) {
fout << fixed << setprecision(4) << "s = " << s << ", a = " << a / 1e4 << ", k = " << k / 1e4 << "\n";
}
}
signed main() {
int t = 1;
// cin >> t;
while (t--)
solve();
return 0;
}