Deploying to gh-pages from @ 28d2ad1 🚀

tereom · Oct 31, 2024 · a97063d · a97063d
1 parent aac7d84
commit a97063d
Show file tree

Hide file tree

Showing 26 changed files with 891 additions and 832 deletions.
diff --git a/01-exploratorio.md b/01-exploratorio.md
diff --git a/01-exploratorio_files/figure-html/unnamed-chunk-11-1.png b/01-exploratorio_files/figure-html/unnamed-chunk-11-1.png
diff --git a/01-exploratorio_files/figure-html/unnamed-chunk-30-1.png b/01-exploratorio_files/figure-html/unnamed-chunk-30-1.png
diff --git a/01-exploratorio_files/figure-html/unnamed-chunk-31-1.png b/01-exploratorio_files/figure-html/unnamed-chunk-31-1.png
diff --git a/01-exploratorio_files/figure-html/unnamed-chunk-33-1.png b/01-exploratorio_files/figure-html/unnamed-chunk-33-1.png
diff --git a/02-tipos-de-estudio_files/figure-html/grafica-pcr-1.png b/02-tipos-de-estudio_files/figure-html/grafica-pcr-1.png
diff --git a/05-remuestreo.md b/05-remuestreo.md
@@ -2098,18 +2098,18 @@ muestra_computos
 
 ```
 ## # A tibble: 10,000 × 23
-##    state_code state_name   state_abbr district_loc_17 district_fed_17 polling_id
-##    <chr>      <chr>        <chr>                <int>           <int>      <int>
-##  1 15         México       MEX                     45              40      77364
-##  2 28         Tamaulipas   TAM                      6               9     126685
-##  3 18         Nayarit      NAY                      7               2      86587
-##  4 09         Ciudad de M… CDMX                     1               1      22214
-##  5 29         Tlaxcala     TLAX                    10               1     127221
-##  6 13         Hidalgo      HGO                      5               2      49224
-##  7 07         Chiapas      CHPS                    15              10      13835
-##  8 19         Nuevo León   NL                       3               6      90042
-##  9 25         Sinaloa      SIN                      6               3     113407
-## 10 15         México       MEX                     22              17      67597
+##    state_code state_name state_abbr district_loc_17 district_fed_17 polling_id
+##    <chr>      <chr>      <chr>                <int>           <int>      <int>
+##  1 18         Nayarit    NAY                     11               2      86709
+##  2 27         Tabasco    TAB                     17               5     122035
+##  3 15         México     MEX                      7              35      75477
+##  4 27         Tabasco    TAB                     19               5     122262
+##  5 17         Morelos    MOR                      6               2      84733
+##  6 07         Chiapas    CHPS                    22               5      15376
+##  7 14         Jalisco    JAL                      2               2      52634
+##  8 08         Chihuahua  CHIH                     7               4      19097
+##  9 14         Jalisco    JAL                     20              20      60549
+## 10 13         Hidalgo    HGO                     11               4      50221
 ## # ℹ 9,990 more rows
 ## # ℹ 17 more variables: section <int>, region <chr>, polling_type <chr>,
 ## #   section_type <chr>, pri_pvem <int>, pan <int>, panal <int>,
@@ -2173,16 +2173,16 @@ as.data.frame(first_computos_boot)
 ## # A tibble: 10,000 × 23
 ##    state_code state_name   state_abbr district_loc_17 district_fed_17 polling_id
 ##    <chr>      <chr>        <chr>                <int>           <int>      <int>
-##  1 14         Jalisco      JAL                     14              14      57620
-##  2 31         Yucatán      YUC                      7               3     139226
-##  3 14         Jalisco      JAL                     14              14      57998
-##  4 16         Michoacán    MICH                    13               3      79390
-##  5 21         Puebla       PUE                     13               7     101237
-##  6 14         Jalisco      JAL                     12              12      57145
-##  7 10         Durango      DGO                      4               1      34549
-##  8 11         Guanajuato   GTO                     20              10      42863
-##  9 03         Baja Califo… BCS                      9               1       5541
-## 10 14         Jalisco      JAL                     16              16      58584
+##  1 07         Chiapas      CHPS                    13               9      13397
+##  2 14         Jalisco      JAL                     15              15      58404
+##  3 09         Ciudad de M… CDMX                    11              13      26471
+##  4 17         Morelos      MOR                      7               3      84909
+##  5 25         Sinaloa      SIN                     11               3     114038
+##  6 11         Guanajuato   GTO                     16              12      41506
+##  7 29         Tlaxcala     TLAX                     6               3     128006
+##  8 02         Baja Califo… BC                       8               5       3901
+##  9 02         Baja Califo… BC                       9               5       3779
+## 10 08         Chihuahua    CHIH                    19               5      19536
 ## # ℹ 9,990 more rows
 ## # ℹ 17 more variables: section <int>, region <chr>, polling_type <chr>,
 ## #   section_type <chr>, pri_pvem <int>, pan <int>, panal <int>,

diff --git a/06-max-verosimilitud.md b/06-max-verosimilitud.md
@@ -253,6 +253,23 @@ estas probabilidades para cada observación $r_1$:
 
 
 ``` r
+# creamos la función de verosimilitud con los datos observados como dados
+verosim <- function(p) {
+  r <- c(1, 2, 0, 3, 0, 0, 0, 2, 1, 0, 3)
+  q_func <- 0.03^r*(0.97)^(10-r)
+  q_falla <- 0.2^r*(0.8)^(10-r)
+  prod(p * q_func + (1 - p) * q_falla)
+}
+
+verosim(0.1)
+```
+
+
+``` r
+# Una alternativa que nos da más flexibilidad para generar la función de 
+# verosimilitud, es crear una función que recibe los datos observados y nos 
+# regresa la función de verosimilitud correspondiente
+# Entonces, cal_verosim es una función que regresa una función 
 calc_verosim <- function(r){
   q_func <- 0.03 ^ r * (0.97) ^ (10 - r)
   q_falla <- 0.2 ^ r * (0.8) ^ (10 - r)
@@ -271,6 +288,7 @@ verosim(0.1)
 ```
 
 
+
 ``` r
 dat_verosim <- tibble(x = seq(0, 1, 0.001)) %>% 
   mutate(prob = map_dbl(x, verosim))
@@ -279,7 +297,7 @@ ggplot(dat_verosim, aes(x = x, y = prob)) + geom_line() +
   xlab("prop funcionado")
 ```
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-5-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-6-1.png" width="480" style="display: block; margin: auto;" />
 
 Y nuestra estimación puntual sería de alrededor de 80%.
 
@@ -391,13 +409,14 @@ ggplot(dat_verosim, aes(x = x, y = log_prob)) + geom_line() +
   xlab("p")
 ```
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-8-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-9-1.png" width="480" style="display: block; margin: auto;" />
 
 Obtenemos el mismo máximo. Podemos incluso resolver numéricamente:
 
 
 ``` r
-solucion <- optim(p = 0.5, log_verosimilitud, control = list(fnscale = -1))
+solucion <- optim(p = 0.5, log_verosimilitud, control = list(fnscale = -1), 
+                  method = "Brent", lower = 0, upper = 1)
 solucion$par
 ```
 
@@ -426,6 +445,24 @@ log_verosim(0.1)
 ## [1] -31.24587
 ```
 
+``` r
+solucion <- optim(p = 0.2, log_verosim, control = list(fnscale = -1), 
+                  method = "Brent", lower = 0, upper = 1)
+solucion$par
+```
+
+```
+## [1] 0.7733766
+```
+
+``` r
+solucion$convergence
+```
+
+```
+## [1] 0
+```
+
 
 ``` r
 dat_verosim <- tibble(x = seq(0,1, 0.001)) %>% mutate(log_verosimilitud = map_dbl(x, log_verosim))
@@ -434,7 +471,7 @@ ggplot(dat_verosim, aes(x = x, y = log_verosimilitud)) + geom_line() +
   xlab("prop funcionado")
 ```
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-11-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-12-1.png" width="480" style="display: block; margin: auto;" />
 
 Nótese que la verosimilitud la consideramos **función de los parámetros**,
 donde **los datos están fijos**.
@@ -475,12 +512,12 @@ tibble(x = seq(0,1,0.001)) %>%
     geom_vline(xintercept = 0.5, color = 'red')
 ```
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-14-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-15-1.png" width="480" style="display: block; margin: auto;" />
 
 **Ejemplo.** Supongamos que en una población de transacciones hay un porcentaje $p$ (desconocido) 
 que son fraudulentas. Tenemos un sistema de clasificación humana que que marca transacciones como sospechosas. 
 Con este sistema hemos medido que la proporción de transacciones normales que son marcadas como sospechosas es de 0.1%, y que la proporción de transacciones fraudulentas que son marcadas
-como sospechosas es de 98%. Supongamos que extraemos una muestra de 2000 transacciones, de manera que todas ellas tiene la misma probabilidad de ser fraudulentas. El sistema de clasificación marca 4 transacciones como fraudulentas. ¿Cómo estimamos la proporción de transacciones fraudulentas en la población?
+como sospechosas es de 98%. Supongamos que extraemos una muestra de 2000 transacciones, de manera que todas ellas tiene la misma probabilidad de ser fraudulentas. El sistema de clasificación marca 4 transacciones como sospechosas ¿Cómo estimamos la proporción de transacciones fraudulentas en la población?
 
 Solución: sea $p$ la proporción de transacciones fraudulentas. Entonces la probabilidad
 de que una transacción sea marcada como sospechosa es (proba total):
@@ -515,12 +552,12 @@ log_verosim <- crear_log_verosim(n = 2000, n_sosp = 4)
 
 A continuación la mostramos de manera gráfica.
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-17-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-18-1.png" width="480" style="display: block; margin: auto;" />
 
 No se ve muy claro dónde ocurre el máximo, pero podemos ampliar cerca de cero la 
 misma gráfica:
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-18-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-19-1.png" width="480" style="display: block; margin: auto;" />
 
 
 
@@ -600,7 +637,7 @@ ggplot() +
   geom_rug(data = tibble(x = x), aes(x = x), colour = "red")
 ```
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-22-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-23-1.png" width="480" style="display: block; margin: auto;" />
 
 Podemos escribir en una fórmula como:
 
@@ -691,7 +728,8 @@ Ahora optimizamos:
 
 
 ``` r
-res <- optim(c(0, 0.5), log_p, control = list(fnscale = -1, maxit = 1000), method = "Nelder-Mead")
+res <- optim(c(0, 0.5), log_p, control = list(fnscale = -1, maxit = 1000), 
+             method = "Nelder-Mead")
 res$convergence
 ```
 
@@ -717,7 +755,7 @@ es el que esperábamos (y que puedes derivar analíticamente):
 
 ``` r
 n <- length(muestra)
-sd_n <- function(x) sqrt( mean((x - mean(x))^2))
+sd_n <- function(x) sqrt(mean((x - mean(x))^2))
 c(media = mean(muestra), sigma = sd_n(muestra)) %>% round(4)
 ```
 
@@ -757,7 +795,7 @@ muestra <- map_dbl(1:200, ~ sim_formas(0.3, 0.75))
 qplot(muestra)
 ```
 
-<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-29-1.png" width="480" style="display: block; margin: auto;" />
+<img src="06-max-verosimilitud_files/figure-html/unnamed-chunk-30-1.png" width="480" style="display: block; margin: auto;" />
 
 Supongamos que no conocemos la probabildad de contestar correctamente  ni la
 proporción de estudiantes que contestó al azar. ¿Como estimamos estas dos cantidades?

diff --git a/..._files/figure-html/unnamed-chunk-11-1.png → ..._files/figure-html/unnamed-chunk-12-1.png b/..._files/figure-html/unnamed-chunk-11-1.png → ..._files/figure-html/unnamed-chunk-12-1.png
diff --git a/..._files/figure-html/unnamed-chunk-14-1.png → ..._files/figure-html/unnamed-chunk-15-1.png b/..._files/figure-html/unnamed-chunk-14-1.png → ..._files/figure-html/unnamed-chunk-15-1.png
diff --git a/06-max-verosimilitud_files/figure-html/unnamed-chunk-17-1.png b/06-max-verosimilitud_files/figure-html/unnamed-chunk-17-1.png
diff --git a/06-max-verosimilitud_files/figure-html/unnamed-chunk-18-1.png b/06-max-verosimilitud_files/figure-html/unnamed-chunk-18-1.png
diff --git a/06-max-verosimilitud_files/figure-html/unnamed-chunk-19-1.png b/06-max-verosimilitud_files/figure-html/unnamed-chunk-19-1.png
diff --git a/..._files/figure-html/unnamed-chunk-22-1.png → ..._files/figure-html/unnamed-chunk-23-1.png b/..._files/figure-html/unnamed-chunk-22-1.png → ..._files/figure-html/unnamed-chunk-23-1.png
diff --git a/..._files/figure-html/unnamed-chunk-29-1.png → ..._files/figure-html/unnamed-chunk-30-1.png b/..._files/figure-html/unnamed-chunk-29-1.png → ..._files/figure-html/unnamed-chunk-30-1.png
diff --git a/...d_files/figure-html/unnamed-chunk-5-1.png → ...d_files/figure-html/unnamed-chunk-6-1.png b/...d_files/figure-html/unnamed-chunk-5-1.png → ...d_files/figure-html/unnamed-chunk-6-1.png
diff --git a/...d_files/figure-html/unnamed-chunk-8-1.png → ...d_files/figure-html/unnamed-chunk-9-1.png b/...d_files/figure-html/unnamed-chunk-8-1.png → ...d_files/figure-html/unnamed-chunk-9-1.png